Analysis walkthrough for the paper “Children actively select words that support learning” (authors removed, 2025).

Preparing the data

Read in data

We first set up the paths to relevant data files and read in the data.

#set up paths
read_path <- here::here("..","data","processed_data")
figure_path <- here::here("..","figures")

#Read in data file
d_complete <- read.csv(here::here(read_path,"act_allData_processed.csv"))

#read in subject choices
subject_choice <- read_csv(here::here(read_path,"act_subject_choice_processed.csv"))

#read in exclusions info
exclusions <- read.csv(here::here(read_path,"all_exclusions.csv"))

Handle exclusions

Next, we handle and summarize exclusions.

Note that for the purposes of the paper, yoking errors are considered to be experimenter errors (i.e., the experimenter made an error in terms of which participant id to yoke a new participant to).

#slightly simplifying regrouping of exclusion reasons for more compact classification of exclusion types
exclusions <- exclusions %>%
  mutate(
    exclusion_type = case_when(
      exclusion_reason %in% c("technical error", "experimenter error") ~ "technical or experimenter error",
      exclusion_reason %in% c("yoking error","no yoked participant") ~ "yoking-related issue",
      exclusion_reason %in% c( "developmental concern","language exposure") ~ "participant characteristics",
      exclusion_reason %in% c("pilot","age exclusion","did not begin study","previous participant") ~ "outside official sample",
      exclusion_reason %in% c("sibling interference", "parent interference") ~ "outside interference",
      exclusion_reason %in% c("experiment ended early") ~ "incomplete"
    )
  )

  
exclusion_table <- exclusions %>%
  filter(exclude_new == "y" |exclude_active_passive_comparison=="y") %>%
  group_by(experiment,exclusion_type,exclusion_reason) %>%
  count()

exclusion_table %>%
  filter(exclusion_type!="outside official sample") %>%
  kable()
experiment exclusion_type exclusion_reason n
exp1 incomplete experiment ended early 2
exp1 participant characteristics language exposure 1
exp1 technical or experimenter error experimenter error 1
exp1 technical or experimenter error technical error 1
exp1 yoking-related issue no yoked participant 3
exp1 yoking-related issue yoking error 1
exp2 incomplete experiment ended early 16
exp2 outside interference parent interference 2
exp2 participant characteristics developmental concern 2
exp2 technical or experimenter error technical error 2
exp2 yoking-related issue yoking error 4
expS1 incomplete experiment ended early 1
expS1 outside interference parent interference 1
expS1 outside interference sibling interference 1
#participants "outside official sample" not counted towards tally
#these are participants who (a) were pilots, (b) were outside of the age range, but were run unofficially on the task anyway to provide equal opportunities to all museum visitors, or (c) had already participated in a version of the experiment

Finally, we remove exclusions from the main data file.

d_post_exclusion <- d_complete %>%
  filter(exclude_new == "n" |exclude_active_passive_comparison == "n")
subject_choice_post_exclusion <- subject_choice %>%
  filter(subject %in% unique(d_post_exclusion$subject))

Summarize participant information

The next bit of code creates the summarizing datasets for reporting demographic characteristics.

participant_grouping_variables <- c("experiment","version","subject","yoked_id","condition","age","age_group","gender","hispanic_latino_yn","ethnicity_race","languages_besides_english_yn","languages_besides_english","l1","l1percent","l1hours","l2","l2percent","l2hours","l3","l3percent","l3hours","l4","l4percent","l4hours","exclude_new","exclude_active_passive_comparison","exclusion_reason","exclusion_reason_notes")

####summarize participants

# before exclusions
participant_info_pre_exclusions <- d_complete %>%
  dplyr::filter( trial_type=="test") %>%
  #gnarly, but just a way to convert the character vector to a set of symbols and unquote that vector
  dplyr::group_by(!!!syms(participant_grouping_variables)) %>%
  dplyr::summarize(
    num_test_trials = sum(trial_type=="test",na.rm=TRUE),
    mean_test_accuracy=mean(is_right[trial_type=="test"],na.rm=TRUE)
    )

#after exclusions
participant_info_post_exclusions <- d_complete %>%
  dplyr::filter( trial_type=="test") %>%
  filter(exclude_new == "n" & exclude_active_passive_comparison=="n") %>%
  dplyr::group_by(!!!syms(participant_grouping_variables)) %>%
  dplyr::summarize(
    num_test_trials = sum(trial_type=="test",na.rm=TRUE),
    mean_test_accuracy=mean(is_right[trial_type=="test"],na.rm=TRUE)
    )

Overview of yoking characteristics

This provides an overview of how all participants were yoked in each experiment - basically, it’s a sanity check that the yoking process worked and we have yoked matches for each participant in the Active condition.

#overview over id matching
yoked_matching_table <- participant_info_post_exclusions %>%
  ungroup() %>%
  select(experiment,yoked_id,condition,subject,age_group) %>%
  group_by(yoked_id) %>%
  pivot_wider(names_from=condition,values_from=subject)
yoked_matching_table %>%
  kable()
experiment yoked_id age_group active passive passive_mismatched
exp1 p106 four-year-olds p106 p108 NA
exp1 p109 three-year-olds p109 p124 NA
exp1 p110 four-year-olds p110 p121 NA
exp1 p111 four-year-olds p111 p122 NA
exp1 p112 five-year-olds p112 p127 NA
exp1 p113 five-year-olds p113 p128 NA
exp1 p114 five-year-olds p114 p131 NA
exp1 p115 four-year-olds p115 p123 NA
exp1 p117 five-year-olds p117 p133 NA
exp1 p118 five-year-olds p118 p135 NA
exp1 p125 five-year-olds p125 p136 NA
exp1 p126 four-year-olds p126 p129 NA
exp1 p130 four-year-olds p130 p132 NA
exp1 p134 four-year-olds p134 p137 NA
exp1 p138 five-year-olds p138 p139 NA
exp1 p140 four-year-olds p140 p142 NA
exp1 p141 three-year-olds p141 p152 NA
exp1 p143 four-year-olds p143 p144 NA
exp1 p145 four-year-olds p145 p150 NA
exp1 p146 five-year-olds p146 p157 NA
exp1 p147 four-year-olds p147 p151 NA
exp1 p148 three-year-olds p148 NA NA
exp1 p153 three-year-olds p153 p154 NA
exp1 p148 four-year-olds NA p155 NA
exp1 test185 four-year-olds test185 p156 NA
exp1 p158 three-year-olds p158 p160 NA
exp1 p159 five-year-olds p159 p163 NA
exp1 p161 four-year-olds p161 p170 NA
exp1 p162 three-year-olds p162 p164 NA
exp1 p165 three-year-olds p165 p167 NA
exp1 p166 five-year-olds p166 p168 NA
exp1 p169 five-year-olds p169 p174 NA
exp1 p171 three-year-olds p171 p173 NA
exp1 p172 four-year-olds p172 p176 NA
exp1 p175 three-year-olds p175 p177 NA
exp1 p178 five-year-olds p178 p180 NA
exp1 p181 five-year-olds p181 p182 NA
exp2 p301 four-year-olds p301 p535 p302
exp2 p303 three-year-olds p303 p532 p307
exp2 p304 four-year-olds p304 p503 p309
exp2 p305 five-year-olds p305 p546 p306
exp2 p308 five-year-olds p308 p595 p318
exp2 p311 four-year-olds p311 p526 p315
exp2 p312 three-year-olds p312 p531 p313
exp2 p314 three-year-olds p314 p562 p316
exp2 p317 four-year-olds p317 p553 p322
exp2 p319 five-year-olds p319 p541 p325
exp2 p320 three-year-olds p320 p547 p444
exp2 p323 four-year-olds p323 p587 p420
exp2 p326 five-year-olds p326 p520 p327
exp2 p328 four-year-olds p328 p572 p329
exp2 p330 five-year-olds p330 p558 p335
exp2 p331 four-year-olds p331 p516 p332
exp2 p333 four-year-olds p333 p602 p334
exp2 p336 five-year-olds p336 p600 p340
exp2 p337 three-year-olds p337 p505 p341
exp2 p339 four-year-olds p339 p598 p344
exp2 p342 three-year-olds p342 p552 p460
exp2 p345 five-year-olds p345 p563 p347
exp2 p346 four-year-olds p346 p596 p356
exp2 p348 five-year-olds p348 p566 p349
exp2 p351 five-year-olds p351 p597 p352
exp2 p353 three-year-olds p353 p506 p354
exp2 p355 five-year-olds p355 p549 p359
exp2 p357 four-year-olds p357 p543 p363
exp2 p358 three-year-olds p358 p504 p366
exp2 p360 five-year-olds p360 p528 p361
exp2 p362 five-year-olds p362 p593 p365
exp2 p364 four-year-olds p364 p578 p377
exp2 p367 three-year-olds p367 p502 p376
exp2 p368 five-year-olds p368 p574 p419
exp2 p371 four-year-olds p371 p517 p372
exp2 p373 five-year-olds p373 p555 p374
exp2 p375 five-year-olds p375 p542 p384
exp2 p378 four-year-olds p378 p603 p382
exp2 p380 five-year-olds p380 p599 p386
exp2 p381 three-year-olds p381 p521 p383
exp2 p385 three-year-olds p385 p559 p396
exp2 p387 four-year-olds p387 p605 p388
exp2 p389 four-year-olds p389 p530 p423
exp2 p390 five-year-olds p390 p529 p391
exp2 p392 four-year-olds p392 p524 p393
exp2 p394 five-year-olds p394 p565 p395
exp2 p397 five-year-olds p397 p525 p402
exp2 p398 three-year-olds p398 p537 p407
exp2 p400 four-year-olds p400 p590 p431
exp2 p403 three-year-olds p403 p515 p434
exp2 p405 five-year-olds p405 p575 p408
exp2 p409 four-year-olds p409 p544 p411
exp2 p410 five-year-olds p410 p536 p416
exp2 p412 four-year-olds p412 p519 p414
exp2 p413 five-year-olds p413 p514 p421
exp2 p417 four-year-olds p417 p551 p418
exp2 p422 five-year-olds p422 p561 p425
exp2 p424 five-year-olds p424 p545 p426
exp2 p427 five-year-olds p427 p570 p428
exp2 p429 five-year-olds p429 p594 p430
exp2 p432 five-year-olds p432 p554 p436
exp2 p433 four-year-olds p433 p501 p437
exp2 p435 three-year-olds p435 p557 p445
exp2 p438 four-year-olds p438 p523 p459
exp2 p439 five-year-olds p439 p522 p441
exp2 p442 five-year-olds p442 p533 p443
exp2 p446 five-year-olds p446 p567 p463
exp2 p447 four-year-olds p447 p588 p450
exp2 p448 three-year-olds p448 p513 p449
exp2 p451 three-year-olds p451 p507 p453
exp2 p454 three-year-olds p454 p508 p455
exp2 p456 three-year-olds p456 p518 p462
exp2 p457 four-year-olds p457 p534 p458
exp2 p568 three-year-olds p568 p573 p607
exp2 p569 three-year-olds p569 p576 p608
exp2 p564 three-year-olds p564 p583 p610
exp2 p584 three-year-olds p584 p586 p611
expS1 p203 five-year-olds p203 NA NA
expS1 p204 four-year-olds p204 NA NA
expS1 p205 four-year-olds p205 NA NA
expS1 p207 five-year-olds p207 NA NA
expS1 p208 four-year-olds p208 NA NA
expS1 p209 three-year-olds p209 NA NA
expS1 p210 three-year-olds p210 NA NA
expS1 p211 five-year-olds p211 NA NA
expS1 p212 four-year-olds p212 NA NA
expS1 p213 five-year-olds p213 NA NA
expS1 p214 four-year-olds p214 NA NA
expS1 p215 four-year-olds p215 NA NA
expS1 p216 five-year-olds p216 NA NA
expS1 p217 four-year-olds p217 NA NA
expS1 p219 five-year-olds p219 NA NA
expS1 p220 four-year-olds p220 NA NA
expS1 p222 five-year-olds p222 NA NA
expS1 p223 three-year-olds p223 NA NA
expS1 p224 five-year-olds p224 NA NA
expS1 p225 three-year-olds p225 NA NA
expS1 p227 three-year-olds p227 NA NA
expS1 p228 four-year-olds p228 NA NA
expS1 p229 five-year-olds p229 NA NA
expS1 p230 three-year-olds p230 NA NA
expS1 p231 three-year-olds p231 NA NA
expS1 p232 four-year-olds p232 NA NA
expS1 p233 five-year-olds p233 NA NA
expS1 p234 four-year-olds p234 NA NA
expS1 p235 three-year-olds p235 NA NA
expS1 p236 four-year-olds p236 NA NA
expS1 p237 five-year-olds p237 NA NA
expS1 p238 four-year-olds p238 NA NA
expS1 p239 five-year-olds p239 NA NA
expS1 p240 five-year-olds p240 NA NA
expS1 p241 three-year-olds p241 NA NA
expS1 p242 five-year-olds p242 NA NA
#note that there's a very small inconsistency for p148 - they are treated as a four-year-old, even though they are 3.99 years (child was two days away from birthday)
#we decided that this is within an acceptable margin, so exclusion is not merited

Demographics

An overview of demographic information for the final sample, after exclusions. The descriptives are summarized overall, for Experiment 1 and 2 only (i.e., the full sample in the main manuscript), and broken down by each experiment

Age, Gender

Overall

####summarize demographics####
participant_demographics <-  participant_info_post_exclusions %>%
  ungroup() %>%
  summarize(N=n(), 
            mean_age = round(mean(age,na.rm=TRUE),1), 
            sd_age = round(sd(age,na.rm=TRUE),1), 
            min_age = min(age,na.rm=TRUE), 
            max_age = max(age,na.rm=TRUE),
            count_female = sum(gender=='female'),
            count_male = sum(gender =='male'),
            l1_english_speaker=sum(l1=="English",na.rm=TRUE),
            multiple_languages=sum(languages_besides_english_yn=="Yes",na.rm=TRUE)
            )
kable(participant_demographics)
N mean_age sd_age min_age max_age count_female count_male l1_english_speaker multiple_languages
339 4.6 0.8 3 6.04 185 154 330 75

Experiment 1 and 2 only

####summarize demographics####
participant_demographics_exp12 <-  participant_info_post_exclusions %>%
  filter(experiment %in% c("exp1","exp2")) %>%
  ungroup() %>%
  summarize(N=n(), 
            mean_age = round(mean(age,na.rm=TRUE),1), 
            sd_age = round(sd(age,na.rm=TRUE),1), 
            min_age = min(age,na.rm=TRUE), 
            max_age = max(age,na.rm=TRUE),
            count_female = sum(gender=='female'),
            count_male = sum(gender =='male'),
            l1_english_speaker=sum(l1=="English",na.rm=TRUE),
            multiple_languages=sum(languages_besides_english_yn=="Yes",na.rm=TRUE)
            )
kable(participant_demographics_exp12)
N mean_age sd_age min_age max_age count_female count_male l1_english_speaker multiple_languages
303 4.6 0.8 3 6.04 166 137 294 65

By Experiment

####summarize demographics####
participant_demographics_by_exp <-  participant_info_post_exclusions %>%
  ungroup() %>%
  group_by(experiment) %>%
  summarize(N=n(), 
            mean_age = round(mean(age,na.rm=TRUE),1), 
            sd_age = round(sd(age,na.rm=TRUE),1), 
            min_age = min(age,na.rm=TRUE), 
            max_age = max(age,na.rm=TRUE),
            count_female = sum(gender=='female'),
            count_male = sum(gender =='male'),
            l1_english_speaker=sum(l1=="English",na.rm=TRUE),
            multiple_languages=sum(languages_besides_english_yn=="Yes",na.rm=TRUE)
            )
kable(participant_demographics_by_exp)
experiment N mean_age sd_age min_age max_age count_female count_male l1_english_speaker multiple_languages
exp1 72 4.6 0.8 3.03 6.040 45 27 70 17
exp2 231 4.6 0.8 3.00 5.997 121 110 224 48
expS1 36 4.7 0.8 3.36 5.950 19 17 36 10

Race, Ethnicity

Overall

participant_info_post_exclusions %>%
  ungroup() %>%
  group_by(ethnicity_race) %>%
  summarize(count=n(),
            percent=count/nrow(participant_info_post_exclusions)) %>%
  kable()
ethnicity_race count percent
American Indian or Alaska Native,Asian 1 0.0029499
American Indian or Alaska Native,White 3 0.0088496
Asian 8 0.0235988
Asian,White 7 0.0206490
Black or African American 2 0.0058997
Black or African American,White 6 0.0176991
Native Hawaiian or Other Pacific Islander,White 1 0.0029499
Other 11 0.0324484
White 292 0.8613569
White,Other 2 0.0058997
NA 6 0.0176991
participant_info_post_exclusions %>%
  ungroup() %>%
  group_by(hispanic_latino_yn) %>%
  summarize(count=n(),
            percent=count/nrow(participant_info_post_exclusions)) %>%
  kable()
hispanic_latino_yn count percent
No 310 0.9144543
Yes 22 0.0648968
NA 7 0.0206490

Experiment 1 and 2 only

participant_info_post_exclusions %>%
  filter(experiment %in% c("exp1","exp2")) %>%
  ungroup() %>%
  group_by(ethnicity_race) %>%
  summarize(count=n(),
            percent=count/nrow(filter(participant_info_post_exclusions,experiment %in% c("exp1","exp2")))) %>%
  kable()
ethnicity_race count percent
American Indian or Alaska Native,Asian 1 0.0033003
American Indian or Alaska Native,White 3 0.0099010
Asian 8 0.0264026
Asian,White 6 0.0198020
Black or African American 2 0.0066007
Black or African American,White 6 0.0198020
Native Hawaiian or Other Pacific Islander,White 1 0.0033003
Other 9 0.0297030
White 260 0.8580858
White,Other 2 0.0066007
NA 5 0.0165017
participant_info_post_exclusions %>%
  filter(experiment %in% c("exp1","exp2")) %>%
  ungroup() %>%
  group_by(hispanic_latino_yn) %>%
  summarize(count=n(),
            percent=count/nrow(filter(participant_info_post_exclusions,experiment %in% c("exp1","exp2")))) %>%
  kable()
hispanic_latino_yn count percent
No 276 0.9108911
Yes 21 0.0693069
NA 6 0.0198020

By Experiment

participant_info_post_exclusions %>%
  ungroup() %>%
  group_by(experiment,ethnicity_race) %>%
  summarize(count=n(),
            #percent within experiment
            percent=count/nrow(participant_info_post_exclusions[participant_info_post_exclusions$experiment==unique(experiment),])
            ) %>%
  kable()
experiment ethnicity_race count percent
exp1 American Indian or Alaska Native,White 1 0.0138889
exp1 Asian 3 0.0416667
exp1 Black or African American 1 0.0138889
exp1 Black or African American,White 3 0.0416667
exp1 Other 2 0.0277778
exp1 White 58 0.8055556
exp1 White,Other 2 0.0277778
exp1 NA 2 0.0277778
exp2 American Indian or Alaska Native,Asian 1 0.0043290
exp2 American Indian or Alaska Native,White 2 0.0086580
exp2 Asian 5 0.0216450
exp2 Asian,White 6 0.0259740
exp2 Black or African American 1 0.0043290
exp2 Black or African American,White 3 0.0129870
exp2 Native Hawaiian or Other Pacific Islander,White 1 0.0043290
exp2 Other 7 0.0303030
exp2 White 202 0.8744589
exp2 NA 3 0.0129870
expS1 Asian,White 1 0.0277778
expS1 Other 2 0.0555556
expS1 White 32 0.8888889
expS1 NA 1 0.0277778
participant_info_post_exclusions %>%
  ungroup() %>%
  group_by(experiment,hispanic_latino_yn) %>%
  summarize(count=n(),
            #percent within experiment
            percent=count/nrow(participant_info_post_exclusions[participant_info_post_exclusions$experiment==unique(experiment),])) %>%
  kable()
experiment hispanic_latino_yn count percent
exp1 No 62 0.8611111
exp1 Yes 7 0.0972222
exp1 NA 3 0.0416667
exp2 No 214 0.9264069
exp2 Yes 14 0.0606061
exp2 NA 3 0.0129870
expS1 No 34 0.9444444
expS1 Yes 1 0.0277778
expS1 NA 1 0.0277778

Experiment 1

The next section reports the analysis and results (with corresponding code) for Experiment 1.

exp1_d <- d_post_exclusion %>%
  filter(experiment == "exp1")

subject_choice_active_exp1 <- subject_choice_post_exclusion %>%
  filter(version == "1" & condition=="active")

Sampling Phase

Descriptives

Children sampled objects very evenly throughout the sampling phase.

summarize_choice_counts <- exp1_d %>%
  filter(trial_type=="learning") %>%
  group_by(subject, stim_set, choice_image) %>%
  count()

summarize_choice_counts %>%
  ggplot(aes(choice_image,n))+
  geom_histogram(stat="identity")+
  facet_wrap(~subject)+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

#summarize gini by participant
subject_choice_exp1_by_subj <- subject_choice_active_exp1 %>%
  group_by(subject) %>%
  summarize(gini = mean(gini))
t.test(subject_choice_exp1_by_subj$gini)
## 
##  One Sample t-test
## 
## data:  subject_choice_exp1_by_subj$gini
## t = 3.6454, df = 35, p-value = 0.0008588
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.01589862 0.05586064
## sample estimates:
##  mean of x 
## 0.03587963

Bootstrapping analysis

To determine whether participants sampled objects more equally than would be expected by chance, we used a bootstrapping analysis in which we simulated datasets in which all choices were random (n = 1000 total simulations), computed the Gini coefficient for each simulated dataset, and then computed the probability of observing the Gini coefficient from our current sample within this null distribution. This analysis found that our Gini coefficient was extremely unlikely to be observed for random behavior (p < .001; average Gini coefficient for random datasets: M = 0.393, 95% CI: [0.392, 0.394]), suggesting that children were much more likely than would be expected by chance to sample evenly across their choice options.

The code for this analysis can be found in a separate R script in the OSF/GitHub repository: 3_sampling_bootstrapping_analysis.R.

Test Phase

Overall Accuracy

Below, we report average test accuracy by condition.

exp1_test_subj <- exp1_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,condition) %>%
  summarize(
    mean_accuracy = mean(is_right,na.rm=TRUE)
  )

exp1_test_summarized <- exp1_test_subj %>%
  group_by(condition) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
exp1_test_summarized %>%
  select(-avg_accuracy_ci) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Condition", "N", "Average Accuracy","CI"),digits=3)
Condition N Average Accuracy CI
active 36 0.689 [0.628, 0.751]
passive 36 0.677 [0.617, 0.738]

Main Model

#### TODO check model convergence here ####
#m <- glmer(is_right~condition_c*stim_set_c+(1+stim_set_c|subject)+(1+stim_set_c*condition_c|target_image)+(1+stim_set_c*condition_c|yoked_id), subset(exp1_d, trial_type=="test"),family="binomial")
#final model
m <- glmer(is_right~condition_c*stim_set_c+(1|subject)+(1|target_image)+(1|yoked_id), subset(exp1_d, trial_type=="test"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## is_right ~ condition_c * stim_set_c + (1 | subject) + (1 | target_image) +  
##     (1 | yoked_id)
##    Data: subset(exp1_d, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1408.5   1443.9   -697.3   1394.5     1145 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3713 -1.0997  0.4991  0.6799  1.2344 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.240054 0.48995 
##  yoked_id     (Intercept) 0.288185 0.53683 
##  target_image (Intercept) 0.001261 0.03552 
## Number of obs: 1152, groups:  subject, 72; yoked_id, 36; target_image, 8
## 
## Fixed effects:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             0.86240    0.12810   6.732 1.67e-11 ***
## condition_c             0.05710    0.17705   0.323    0.747    
## stim_set_c             -0.06269    0.13254  -0.473    0.636    
## condition_c:stim_set_c -0.05448    0.26465  -0.206    0.837    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ stm_s_
## condition_c  0.003              
## stim_set_c  -0.008 -0.004       
## cndtn_c:s__ -0.003 -0.008  0.012
#extract coefficients from model
#get parameter estimates
model_estimates_exp1_main <- m %>%
  tidy(effects="fixed") 

To test for differences in word learning across condition, we fit a logistic mixed-effects model predicting participants’ trial-by-trial accuracy from condition (dummy coded), experiment block (centered), and their interaction. The maximally converging random-effects structure included random intercepts for participants, items, and yoked pairings (i.e., observations from participants who were yoked together were treated as non-independent). There was no significant difference between word learning accuracy in the Active (M = 68.9%, 95% CI [62.8%, 75.1%]) and the Yoked Passive condition (M = 67.7%, 95% CI [61.7%, 73.8%]), b = 0.06, z = 0.32, p = 0.75. We also found no significant condition by block interaction, b = -0.05, z = -0.21, p = 0.84.

Plot

ggplot(exp1_test_summarized,aes(condition,avg_accuracy,color=condition,fill=condition))+
  geom_half_violin(data= exp1_test_subj,aes(y=mean_accuracy),position = position_nudge(x = -.1, y = 0), width=0.8,adjust=1.5,trim = T, alpha = .8,color=NA,side="l")+
  geom_point(stat="identity",size=4,position = position_nudge(x = .2, y = 0))+
  geom_errorbar(aes(ymin=avg_accuracy_lower_ci,ymax=avg_accuracy_upper_ci),width=0,position = position_nudge(x = .2, y = 0))+
  geom_jitter(data= exp1_test_subj,aes(y=mean_accuracy),size=2,width=0.05,height=0.025,alpha=0.4,stroke=NA)+
  theme_cowplot()+
  theme(legend.position="none")+
  geom_hline(yintercept=0.5,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  ylim(-0.04,1.04)+
  scale_x_discrete(
    breaks=c("active","passive"),
    labels=c("Active","Yoked Passive"))+
  ylab("Word Learning Accuracy")+
  xlab("Condition")

ggsave(here(figure_path,"exp1_accuracy_overall.png"),width=9,height=6,bg = "white")

Test Accuracy By Block

Here, we report average test accuracy by condition, additionally broken down by Block.

exp1_test_subj_block <- exp1_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,condition,stim_set) %>%
  summarize(
    mean_accuracy = mean(is_right,na.rm=TRUE)
  ) %>%
  mutate(block = ifelse(stim_set==1,"BLOCK 1","BLOCK 2"))

exp1_test_summarized_block <- exp1_test_subj_block %>%
  group_by(condition,stim_set, block) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
exp1_test_summarized_block %>%
  ungroup() %>%
  arrange(block) %>%
  select(-avg_accuracy_ci,-stim_set) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(block,condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Block","Condition", "N", "Average Accuracy","CI"),digits=3)
Block Condition N Average Accuracy CI
BLOCK 1 active 36 0.698 [0.624, 0.772]
BLOCK 1 passive 36 0.681 [0.593, 0.768]
BLOCK 2 active 36 0.681 [0.609, 0.752]
BLOCK 2 passive 36 0.674 [0.601, 0.747]
ggplot(exp1_test_summarized_block,aes(condition,avg_accuracy,color=condition,fill=condition))+
  geom_half_violin(data= exp1_test_subj_block,aes(y=mean_accuracy),position = position_nudge(x = -.1, y = 0), width=0.8,adjust=1.5,trim = T, alpha = .8,color=NA,side="l")+
  geom_point(stat="identity",size=4,position = position_nudge(x = .2, y = 0))+
  geom_errorbar(aes(ymin=avg_accuracy_lower_ci,ymax=avg_accuracy_upper_ci),width=0,position = position_nudge(x = .2, y = 0))+
  geom_jitter(data= exp1_test_subj_block,aes(y=mean_accuracy),size=2,width=0.05,height=0.025,alpha=0.4,stroke=NA)+
  theme_cowplot()+
  theme(legend.position="none")+
  geom_hline(yintercept=0.5,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  ylim(-0.04,1.04)+
  scale_x_discrete(
    breaks=c("active","passive"),
    labels=c("Active","Yoked Passive"))+
  ylab("Word Learning Accuracy")+
  xlab("Condition")+
  facet_grid(~block)

ggsave(here(figure_path,"exp1_accuracy_by_block.png"),width=9,height=6,bg = "white")

Interaction with Age

#center age
exp1_d <- exp1_d %>%
  mutate(age_c = age - mean(age,na.rm=T))

#### TODO check model convergence here ####
#m <- glmer(is_right~condition*stim_set_c+(1+stim_set_c*condition_c|subject)+(1+stim_set_c*condition_c|target_image)+(1+stim_set_c*condition_c|yoked_id), subset(exp1_d, trial_type=="test"),family="binomial")
m <- glmer(is_right~condition_c*age_c+(1|subject)+(1|target_image)+(1|yoked_id), subset(exp1_d, trial_type=="test"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * age_c + (1 | subject) + (1 | target_image) +  
##     (1 | yoked_id)
##    Data: subset(exp1_d, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1406.8   1442.1   -696.4   1392.8     1145 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5304 -1.0873  0.4941  0.6827  1.2214 
## 
## Random effects:
##  Groups       Name        Variance  Std.Dev.
##  subject      (Intercept) 0.2509544 0.50095 
##  yoked_id     (Intercept) 0.2457447 0.49573 
##  target_image (Intercept) 0.0007412 0.02723 
## Number of obs: 1152, groups:  subject, 72; yoked_id, 36; target_image, 8
## 
## Fixed effects:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        0.86281    0.12378   6.970 3.16e-12 ***
## condition_c        0.05466    0.17899   0.305    0.760    
## age_c              0.20766    0.14702   1.412    0.158    
## condition_c:age_c -0.06748    0.22613  -0.298    0.765    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ age_c 
## condition_c  0.002              
## age_c        0.030 -0.001       
## cndtn_c:g_c -0.007  0.043  0.024
confint(m, method="Wald")[4:7,]
##                         2.5 %    97.5 %
## (Intercept)        0.62019533 1.1054179
## condition_c       -0.29614595 0.4054701
## age_c             -0.08049984 0.4958150
## condition_c:age_c -0.51068593 0.3757174
model_estimates_exp1_age_interaction <- m %>%
  tidy(effects="fixed")

To test for possible differences in the effect of active learning across age, we fit a logistic mixed-effects model predicting participants’ trial-by-trial accuracy from condition (dummy coded), age (centered), and their interaction. We included random intercepts for participants, items and for yoked pairings (i.e., observations from participants who were yoked together were treated as non-independent). We found no evidence for an interaction between age and condition (p = 0.77).

Here, we plot the condition effect for each age group.

exp1_test_subj_age_group <- exp1_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,condition,age_group) %>%
  summarize(
    mean_accuracy = mean(is_right,na.rm=TRUE)
  ) %>%
  mutate(
    age_group_f = factor(age_group, levels=c("three-year-olds","four-year-olds","five-year-olds")))

exp1_test_summarized_age_group <- exp1_test_subj_age_group %>%
  group_by(condition,age_group_f) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
exp1_test_summarized_age_group %>%
  ungroup() %>%
  select(-avg_accuracy_ci) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(age_group_f,condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Age Group","Condition", "N", "Average Accuracy","CI"),digits=3)
Age Group Condition N Average Accuracy CI
three-year-olds active 9 0.597 [0.47, 0.725]
four-year-olds active 14 0.732 [0.64, 0.824]
five-year-olds active 13 0.707 [0.583, 0.83]
three-year-olds passive 8 0.648 [0.548, 0.749]
four-year-olds passive 15 0.621 [0.516, 0.726]
five-year-olds passive 13 0.760 [0.653, 0.866]
ggplot(exp1_test_summarized_age_group,aes(condition,avg_accuracy,color=condition,fill=condition))+
  geom_half_violin(data= exp1_test_subj_age_group,aes(y=mean_accuracy),position = position_nudge(x = -.1, y = 0), width=0.8,adjust=1.5,trim = T, alpha = .8,color=NA,side="l")+
  geom_point(stat="identity",size=4,position = position_nudge(x = .2, y = 0))+
  geom_errorbar(aes(ymin=avg_accuracy_lower_ci,ymax=avg_accuracy_upper_ci),width=0,position = position_nudge(x = .2, y = 0))+
  geom_jitter(data= exp1_test_subj_age_group,aes(y=mean_accuracy),size=2,width=0.05,height=0.025,alpha=0.4,stroke=NA)+
  theme_cowplot()+
  theme(legend.position="none")+
  geom_hline(yintercept=0.5,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  ylim(-0.04,1.04)+
  scale_x_discrete(
    breaks=c("active","passive"),
    labels=c("Active","Yoked Passive"))+
  ylab("Word Learning Accuracy")+
  xlab("Condition")+
  facet_grid(~age_group_f)

ggsave(here(figure_path,"exp1_accuracy_by_age_group.png"),width=9,height=6,bg = "white")

Additional robustness check: 3-way interaction between block, age, and condition

#three-way interaction
m <- glmer(is_right~condition_c*stim_set_c*age_c+(1+stim_set_c|subject)+(1|target_image)+(1|yoked_id), subset(exp1_d, trial_type=="test"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition_c * stim_set_c * age_c + (1 + stim_set_c |  
##     subject) + (1 | target_image) + (1 | yoked_id)
##    Data: subset(exp1_d, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1411.2   1476.9   -692.6   1385.2     1139 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3341 -1.0247  0.4835  0.6868  1.4080 
## 
## Random effects:
##  Groups       Name        Variance  Std.Dev.  Corr 
##  subject      (Intercept) 3.010e-01 0.5486600      
##               stim_set_c  6.352e-01 0.7969813 -0.45
##  yoked_id     (Intercept) 2.293e-01 0.4788059      
##  target_image (Intercept) 7.536e-08 0.0002745      
## Number of obs: 1152, groups:  subject, 72; yoked_id, 36; target_image, 8
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   0.89587    0.12580   7.121 1.07e-12 ***
## condition_c                   0.05305    0.18839   0.282    0.778    
## stim_set_c                   -0.12998    0.17236  -0.754    0.451    
## age_c                         0.21140    0.14992   1.410    0.159    
## condition_c:stim_set_c       -0.04951    0.33118  -0.150    0.881    
## condition_c:age_c            -0.07702    0.23757  -0.324    0.746    
## stim_set_c:age_c              0.03355    0.20605   0.163    0.871    
## condition_c:stim_set_c:age_c -0.04041    0.41055  -0.098    0.922    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtn_ stm_s_ age_c  cn_:__ cnd_:_ st__:_
## condition_c -0.001                                          
## stim_set_c  -0.164  0.002                                   
## age_c        0.031 -0.003 -0.005                            
## cndtn_c:s__  0.001 -0.207 -0.001  0.001                     
## cndtn_c:g_c -0.009  0.039  0.002  0.024 -0.002              
## stm_st_c:g_ -0.005  0.000  0.069 -0.160 -0.006 -0.010       
## cndtn_:__:_ -0.001 -0.003 -0.011 -0.011  0.047 -0.203  0.025
confint(m, method="Wald")[6:13,]
##                                    2.5 %    97.5 %
## (Intercept)                   0.64930012 1.1424452
## condition_c                  -0.31619474 0.4222939
## stim_set_c                   -0.46779372 0.2078299
## age_c                        -0.08243029 0.5052299
## condition_c:stim_set_c       -0.69861398 0.5995892
## condition_c:age_c            -0.54265744 0.3886124
## stim_set_c:age_c             -0.37029989 0.4374082
## condition_c:stim_set_c:age_c -0.84505863 0.7642486
model_estimates_exp1_three_way <- m %>%
  tidy(effects="fixed")

To explore whether the effect of condition depended on age, we fit a logistic mixed-effects model predicting the likelihood of making a correct choice from the three-way interaction between condition (centered; Active = 0.5; Passive = -0.5), block (unit centered), age (mean-centered), and all lower-order effects. The model included random intercepts for participant, target image, and yoked participant id, as well as a by-participant random slope for block. There was no evidence of a three-way interaction between condition, age, and block, p = 0.92.

Experiment S1

expS1_d <- d_post_exclusion %>%
  filter(experiment == "expS1")

subject_choice_active_expS1 <- subject_choice %>%
  filter(version == "2A" & condition=="active")

Sampling Phase

Sampling Choices

Participants differentially selected words during the Sampling Phase depending on the number of occurrences of the word during the Exposure Phase. Children were more likely to sample items the less frequently they were labeled during the Exposure Phase.

#choices by exposure type
expS1_choice_data_frame <- expand.grid(
  subject=unique(filter(expS1_d,trial_type=="learning")$subject),choice_kind=c("high","medium","low","no"),stim_set=c(1,2)) %>%
  mutate(yoked_id = subject)
expS1_choice_type <-  expS1_d %>%
  filter(condition=="active"&trial_type=="learning") %>%
  mutate(choice_kind=as.factor(choice_kind)) %>%
  group_by(subject,yoked_id,stim_set, choice_kind,choice_image) %>%
  summarize(choice_num=n())
expS1_choice_type_complete <- expS1_choice_data_frame %>%
  left_join(expS1_choice_type) %>%
  mutate(choice_num=ifelse(is.na(choice_num),0,choice_num)) %>%
  mutate(choice_prob = choice_num/3)

expS1_summarize_choice_type <- summarySEwithin(
  data=expS1_choice_type_complete,
  measurevar="choice_num",
  withinvars=c("choice_kind"),
  idvar="subject") %>%
  mutate(
    lower_ci = choice_num - ci,
    upper_ci = choice_num + ci)
expS1_summarize_choice_type_block <- summarySEwithin(
  data=expS1_choice_type_complete,
  measurevar="choice_num",
  withinvars=c("stim_set","choice_kind"),
  idvar="subject") %>%
  mutate(
    lower_ci = choice_num - ci,
    upper_ci = choice_num + ci) %>%
  mutate(block = ifelse(stim_set==1,"BLOCK 1","BLOCK 2"))

#table
expS1_summarize_choice_type %>%
  select(-N,-ciMult,-choice_num_norm,-sd,-se,-ci) %>%
  arrange(choice_num) %>%
  kable()
choice_kind choice_num lower_ci upper_ci
high 1.138889 0.9440421 1.333736
medium 1.361111 1.1829445 1.539278
low 1.513889 1.3435327 1.684245
no 1.986111 1.8068565 2.165366
#plot
expS1_sampling_choices_plot <- ggplot(expS1_summarize_choice_type,aes(choice_kind,choice_num, color=choice_kind, fill=choice_kind))+
  geom_bar(stat="identity",size=2,alpha=0.05)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),color="black",width=0.1)+
  theme_classic()+
  scale_x_discrete(labels=c("5 Exposures","2 Exposures","1 Exposure","0 Exposures"),limits=c("high","medium","low","no"))+
  theme_classic()+
  scale_color_viridis(discrete=T)+
  scale_fill_viridis(discrete=T)+
  xlab("Exposure Item")+
  ylab("Average Sampling Choice Frequency")+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(size=13),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")
expS1_sampling_choices_plot

ggsave(here(figure_path,"expS1_sampling_choices_by_exposure.png"),width=9,height=6)

Lower Exposure Choices

Descriptives

As in Experiment 2, children weighed the relative informativeness of their options. Participants preferentially selected words during the Sampling Phase that were heard less frequently during the Exposure Phase.

#overall
expS1_sum_subj_choices <- expS1_d %>%
  filter(trial_type=="learning") %>%
  group_by(subject,age,age_group,condition,choice_trial_type) %>%
  summarize(
    avg_lower_exposure_choice=mean(lower_exposure_choice))
##by block
expS1_sum_subj_choices_block <- expS1_d %>%
  filter(trial_type=="learning") %>%
  group_by(subject,age,age_group,condition,choice_trial_type,stim_set) %>%
  summarize(
    avg_lower_exposure_choice=mean(lower_exposure_choice))

#overall
expS1_overall_exposure_choices_subj <- expS1_sum_subj_choices %>%
  group_by(subject,age,age_group,condition) %>%
  summarize(mean_lower_exposure_choice=mean(avg_lower_exposure_choice))
##by block
expS1_overall_exposure_choices_subj_block <- expS1_sum_subj_choices_block %>%
  group_by(subject,age,age_group,condition,stim_set) %>%
  summarize(mean_lower_exposure_choice=mean(avg_lower_exposure_choice))

## summarize
#within-subject-corrected
## by block
expS1_overall_exposure_choices_within_corrected_block <- summarySEwithin(filter(expS1_overall_exposure_choices_subj_block, condition=="active"),"mean_lower_exposure_choice", betweenvars=c("condition"),withinvars=c("stim_set"),idvar="subject" ) %>%
  mutate(lower_ci = mean_lower_exposure_choice - ci,upper_ci = mean_lower_exposure_choice  + ci)
#uncorrected (reported)
expS1_overall_exposure_choices <- summarySE(filter(expS1_overall_exposure_choices_subj, condition=="active"),"mean_lower_exposure_choice", groupvars=c("condition"))%>%
  mutate(lower_ci = mean_lower_exposure_choice - ci,upper_ci = mean_lower_exposure_choice  + ci)
## by block
expS1_overall_exposure_choices_block <- summarySE(filter(expS1_overall_exposure_choices_subj_block, condition=="active"),"mean_lower_exposure_choice", groupvars=c("condition"))%>%
  mutate(lower_ci = mean_lower_exposure_choice - ci,upper_ci = mean_lower_exposure_choice  + ci)
Overall
#t-tests
#overall
t.test(subset(expS1_overall_exposure_choices_subj, condition=="active")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(expS1_overall_exposure_choices_subj, condition == "active")$mean_lower_exposure_choice
## t = 5.0172, df = 35, p-value = 1.523e-05
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5689089 0.6625726
## sample estimates:
## mean of x 
## 0.6157407
By Block
Block 1
#Block 1
t.test(filter(expS1_overall_exposure_choices_subj_block, condition=="active"&stim_set==1)$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  filter(expS1_overall_exposure_choices_subj_block, condition == "active" & stim_set == 1)$mean_lower_exposure_choice
## t = 2.2831, df = 35, p-value = 0.02861
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5071834 0.6224462
## sample estimates:
## mean of x 
## 0.5648148
Block 2
#Block 2
t.test(filter(expS1_overall_exposure_choices_subj_block, condition=="active"&stim_set==2)$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  filter(expS1_overall_exposure_choices_subj_block, condition == "active" & stim_set == 2)$mean_lower_exposure_choice
## t = 5.2337, df = 35, p-value = 7.909e-06
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.6020177 0.7313156
## sample estimates:
## mean of x 
## 0.6666667

Age Effects

m <- lm(mean_lower_exposure_choice~ age,subset(expS1_overall_exposure_choices_subj, condition=="active"))
summary(m)
## 
## Call:
## lm(formula = mean_lower_exposure_choice ~ age, data = subset(expS1_overall_exposure_choices_subj, 
##     condition == "active"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24176 -0.09419 -0.01167  0.03970  0.30462 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.39463    0.13290   2.969  0.00544 **
## age          0.04737    0.02806   1.688  0.10054   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1349 on 34 degrees of freedom
## Multiple R-squared:  0.07734,    Adjusted R-squared:  0.0502 
## F-statistic:  2.85 on 1 and 34 DF,  p-value: 0.1005
confint(m)
##                    2.5 %    97.5 %
## (Intercept)  0.124552761 0.6647031
## age         -0.009654893 0.1043893
model_estimates_exps1_age <- broom::tidy(m) %>%
  mutate(df=m$df.residual)
model_estimates_exps1_age$lower_ci <- confint(m)[,1]
model_estimates_exps1_age$upper_ci <- confint(m)[,2]

We also fit a linear regression model predicting whether the proportion of lower exposure choices changed with age. Choosing the lower-exposure item did not significantly increase with age, b = 0.05, 95% Wald CI = [-0.01, 0.10], t(34) = 1.69, p = .10. b = 0.05, 95% Wald CI = [-0.01, 0.1], t(34) = 1.69, p = 0.1.

Below is a simple plot of the effect of age.

ggplot(subset(expS1_overall_exposure_choices_subj, condition=="active"),
       aes(age,mean_lower_exposure_choice))+
  geom_hline(yintercept = 0.5,linetype="dashed")+
  geom_point()+
  geom_smooth(method="lm")+
  xlab("Age (in years)")+
  ylab("Proportion Lower Exposure Choices")

ggsave(here(figure_path,"expS1_lower_exposure_choices_across_age.png"),width=9,height=6,bg = "white")

Below, we break down the preference for the lower exposure choice for each age group.

3-year-olds
t.test(subset(expS1_overall_exposure_choices_subj, condition=="active"&age_group=="three-year-olds")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(expS1_overall_exposure_choices_subj, condition == "active" & age_group == "three-year-olds")$mean_lower_exposure_choice
## t = 0.70989, df = 8, p-value = 0.4979
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.4167252 0.6573488
## sample estimates:
## mean of x 
##  0.537037
4-year-olds
t.test(subset(expS1_overall_exposure_choices_subj, condition=="active"&age_group=="four-year-olds")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(expS1_overall_exposure_choices_subj, condition == "active" & age_group == "four-year-olds")$mean_lower_exposure_choice
## t = 3.3389, df = 12, p-value = 0.0059
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5534538 0.7542385
## sample estimates:
## mean of x 
## 0.6538462
5-year-olds
t.test(subset(expS1_overall_exposure_choices_subj, condition=="active"&age_group=="five-year-olds")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(expS1_overall_exposure_choices_subj, condition == "active" & age_group == "five-year-olds")$mean_lower_exposure_choice
## t = 6.9041, df = 13, p-value = 1.079e-05
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5899760 0.6719288
## sample estimates:
## mean of x 
## 0.6309524

Robustness Check

Robustness Check: Model-Based approach with logistic mixed-effects models.

We obtain similar effects when fitting a trial-by-trial logistic mixed-effects model predicting whether participants choose the lower-exposure item.

Overall
#overall
m <- glmer(lower_exposure_choice~1+(1|subject)+(1|available_image1)+(1|available_image2),data=subset(expS1_d,trial_type=="learning"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: lower_exposure_choice ~ 1 + (1 | subject) + (1 | available_image1) +  
##     (1 | available_image2)
##    Data: subset(expS1_d, trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##    583.0    599.3   -287.5    575.0      428 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3296 -1.2792  0.7677  0.7794  0.8674 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  subject          (Intercept) 0.00000  0.000   
##  available_image1 (Intercept) 0.00000  0.000   
##  available_image2 (Intercept) 0.02756  0.166   
## Number of obs: 432, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.4800     0.1163   4.128 3.66e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Block 1
m <- glmer(lower_exposure_choice~1+(1|subject)+(1|available_image1)+(1|available_image2),data=subset(expS1_d,trial_type=="learning"&stim_set==1), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: lower_exposure_choice ~ 1 + (1 | subject) + (1 | available_image1) +  
##     (1 | available_image2)
##    Data: subset(expS1_d, trial_type == "learning" & stim_set == 1)
## 
##      AIC      BIC   logLik deviance df.resid 
##    303.8    317.3   -147.9    295.8      212 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1392 -1.1392  0.8778  0.8778  0.8778 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  subject          (Intercept) 0        0       
##  available_image1 (Intercept) 0        0       
##  available_image2 (Intercept) 0        0       
## Number of obs: 216, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.2607     0.1372     1.9   0.0575 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Block 2
#stimSet 2
m <- glmer(lower_exposure_choice~1+(1|subject)+(1|available_image1)+(1|available_image2),data=subset(expS1_d,trial_type=="learning"&stim_set==2), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: lower_exposure_choice ~ 1 + (1 | subject) + (1 | available_image1) +  
##     (1 | available_image2)
##    Data: subset(expS1_d, trial_type == "learning" & stim_set == 2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    283.0    296.5   -137.5    275.0      212 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4142 -1.4142  0.7071  0.7071  0.7071 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev.
##  subject          (Intercept) 4.002e-14 2e-07   
##  available_image1 (Intercept) 0.000e+00 0e+00   
##  available_image2 (Intercept) 0.000e+00 0e+00   
## Number of obs: 216, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.6931     0.1443   4.802 1.57e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Age

Does the effect depend on age?

#center age
expS1_d <- expS1_d %>%
  mutate(age_c = age - mean(age,na.rm=T))

#overall
## note: singular fit due to random intercept for available_image1 does not change estimate
m <- glmer(lower_exposure_choice~age_c+(1|subject)+(1|available_image1)+(1|available_image2),data=subset(expS1_d,trial_type=="learning"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## lower_exposure_choice ~ age_c + (1 | subject) + (1 | available_image1) +  
##     (1 | available_image2)
##    Data: subset(expS1_d, trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##    582.4    602.7   -286.2    572.4      427 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5165 -1.1844  0.7128  0.7853  0.9954 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  subject          (Intercept) 1.165e-10 1.079e-05
##  available_image1 (Intercept) 2.320e-10 1.523e-05
##  available_image2 (Intercept) 2.922e-02 1.709e-01
## Number of obs: 432, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.4837     0.1176   4.115 3.88e-05 ***
## age_c         0.2032     0.1244   1.633    0.102    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr)
## age_c 0.036 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Plot

#proportion choices for different choice pairs
expS1_sum_choices <- expS1_sum_subj_choices %>%
  group_by(condition,choice_trial_type) %>%
  summarize(
    N=n(),
    lower_exposure_choice_prob=mean(avg_lower_exposure_choice),
    sd_le = sd(avg_lower_exposure_choice,na.rm=TRUE),
    ci = qt(0.975, N-1)*sd_le/sqrt(N),
    lower_ci = lower_exposure_choice_prob - ci,
    upper_ci = lower_exposure_choice_prob + ci,
    ) %>%
  mutate(
    highest_freq = case_when(
      choice_trial_type %in% c("high_low","high_no","high_medium") ~ "high",
      choice_trial_type %in% c("low_medium","medium_no") ~ "medium",
      choice_trial_type %in% c("low_no") ~ "low",
      TRUE ~ NA_character_),
    low_freq = case_when(
      choice_trial_type %in% c("low_no","high_no","medium_no") ~ "no",
      choice_trial_type %in% c("low_medium","high_low") ~ "low",
      choice_trial_type %in% c("high_medium") ~ "medium",
      TRUE ~ NA_character_)
    ) %>%
  mutate(
    highest_freq_f = factor(highest_freq, levels=c("high","medium","low"), labels=c("5 Exposures","2 Exposures","1 Exposure"))
  )
##by block
expS1_sum_choices_block <- expS1_sum_subj_choices_block %>%
  group_by(condition,choice_trial_type,stim_set) %>%
  summarize(
    N=n(),
    lower_exposure_choice_prob=mean(avg_lower_exposure_choice),
    ci.lower=binom.test(sum(avg_lower_exposure_choice),N)$conf.int[1],
    ci.upper=binom.test(sum(avg_lower_exposure_choice),N)$conf.int[2]) %>%
  mutate(
    highest_freq = case_when(
      choice_trial_type %in% c("high_low","high_no","high_medium") ~ "high",
      choice_trial_type %in% c("low_medium","medium_no") ~ "medium",
      choice_trial_type %in% c("low_no") ~ "low",
      TRUE ~ NA_character_),
    low_freq = case_when(
      choice_trial_type %in% c("low_no","high_no","medium_no") ~ "no",
      choice_trial_type %in% c("low_medium","high_low") ~ "low",
      choice_trial_type %in% c("high_medium") ~ "medium",
      TRUE ~ NA_character_)
    ) %>%
  mutate(
    highest_freq_f = factor(highest_freq, levels=c("high","medium","low"), labels=c("5 Exposures","2 Exposures","1 Exposure"))
  ) %>%
  mutate(
    block = ifelse(stim_set==1,"BLOCK 1","BLOCK 2")
  )
Overall
#plot
expS1_sampling_pairs <- ggplot(subset(expS1_sum_choices,condition=="active"),aes(low_freq,lower_exposure_choice_prob,color=low_freq,fill=low_freq))+
  geom_bar(stat="identity",size=2,,alpha=0.05)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0)+
  theme_classic()+
  scale_x_discrete(labels=c("2","1","0"),limits=c("medium","low","no"))+
  ylab("Lower Exposure Choice Probability")+
  xlab("Lower Exposure Item")+
  geom_hline(yintercept=.5,linetype="dotted")+
  facet_wrap(~highest_freq_f)+
  ylim(0,1)+
  #scale_color_viridis(discrete=T)+
  scale_color_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  #scale_fill_viridis(discrete=T)+
  scale_fill_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(size=16),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")
expS1_sampling_pairs

ggsave(here(figure_path,"expS1_sampling_lowerExposureChoices.png"),width=11,height=7)
Combined Sampling Choices and Exposure Pairs Figure
# save in one combined plot (Figure S1)
plot_grid(expS1_sampling_choices_plot,expS1_sampling_pairs, labels=c("A","B"))

ggsave(here(figure_path,"expS1_sampling_all.png"),width=11,height=7)
By Block

Here, we break down the lower exposure choice proportions by block.

p1 <- ggplot(subset(expS1_sum_choices_block,condition=="active"&block=="BLOCK 1"),aes(low_freq,lower_exposure_choice_prob,color=highest_freq_f))+
  geom_bar(stat="identity",size=2,fill="white")+
  geom_errorbar(aes(ymin=ci.lower,ymax=ci.upper),width=0)+
  theme_classic()+
  scale_x_discrete(labels=c("2 Exposures","1 Exposure","0 Exposures"),limits=c("medium","low","no"))+
  ylab("Lower Exposure Choice Probability")+
  xlab("Lower Exposure Item")+
  geom_hline(yintercept=.5,linetype="dotted")+
  facet_wrap(~highest_freq_f)+
  ylim(0,1)+
  scale_color_viridis(discrete=T)+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(angle=90,vjust=0.5,size=16),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")

p2 <- ggplot(subset(expS1_sum_choices_block,condition=="active"&block=="BLOCK 2"),aes(low_freq,lower_exposure_choice_prob,color=highest_freq_f))+
  geom_bar(stat="identity",size=2,fill="white")+
  geom_errorbar(aes(ymin=ci.lower,ymax=ci.upper),width=0)+
  theme_classic()+
  scale_x_discrete(labels=c("2 Exposures","1 Exposure","0 Exposures"),limits=c("medium","low","no"))+
  ylab("Lower Exposure Choice Probability")+
  xlab("Lower Exposure Item")+
  geom_hline(yintercept=.5,linetype="dotted")+
  facet_wrap(~highest_freq_f)+
  ylim(0,1)+
  scale_color_viridis(discrete=T)+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(angle=90,vjust=0.5,size=16),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")
plot_grid(p1,p2, labels=c("A","B"))

ggsave(here(figure_path,"expS1_sampling_lowerExposureChoices_by_block.png"),width=11,height=7)

Informativeness Difference Analysis

Model

We also fit a logistic mixed-effects model predicting children’s likelihood of choosing a given item from its difference in informativeness to the alternative option (collapsing across blocks), including random intercepts for participant and for each of item option. Children’s likelihood of choosing an item increased as it became more informative relative to the alternative item presented on a given trial.

#more complex model (resulted in a singular fit, so we pruned the random slope - results are robust across random effect specifications)
#m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1+cur_exp_log_rel_freq_diff12|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,trial_type=="learning"),family=binomial)
m_inf_expS1 <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,trial_type=="learning"),family=binomial)
summary(m_inf_expS1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##    537.3    557.7   -263.7    527.3      427 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.5368 -0.7534  0.3325  0.7666  2.0713 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev.
##  subject          (Intercept) 9.865e-02 0.314082
##  available_image1 (Intercept) 1.452e-07 0.000381
##  available_image2 (Intercept) 1.447e-01 0.380411
## Number of obs: 432, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.05486    0.18015   0.305    0.761    
## cur_exp_log_rel_freq_diff12  0.54041    0.07430   7.273 3.51e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.042
confint(m_inf_expS1, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.2982244 0.4079468
## cur_exp_log_rel_freq_diff12  0.3947821 0.6860428
Robustness: Interaction with Block

The effect did not interact with block.

m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12*stim_set_c+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,trial_type=="learning"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 * stim_set_c +  
##     (1 | subject) + (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##    537.9    566.3   -261.9    523.9      425 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2357 -0.7403  0.2729  0.7833  1.9189 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  subject          (Intercept) 1.160e-01 0.3405848
##  available_image1 (Intercept) 1.472e-08 0.0001213
##  available_image2 (Intercept) 1.549e-01 0.3936256
## Number of obs: 432, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                             0.04952    0.18546   0.267    0.789    
## cur_exp_log_rel_freq_diff12             0.54738    0.07536   7.264 3.77e-13 ***
## stim_set_c                              0.24927    0.21935   1.136    0.256    
## cur_exp_log_rel_freq_diff12:stim_set_c  0.21341    0.13921   1.533    0.125    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cr_____12 stm_s_
## cr_xp____12  0.041                 
## stim_set_c   0.029  0.016          
## c_____12:__ -0.015  0.141     0.073
confint(m, method="Wald")[4:7,]
##                                              2.5 %    97.5 %
## (Intercept)                            -0.31398028 0.4130222
## cur_exp_log_rel_freq_diff12             0.39967626 0.6950753
## stim_set_c                             -0.18065216 0.6791898
## cur_exp_log_rel_freq_diff12:stim_set_c -0.05944032 0.4862521
Robustness: Block 1 alone
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,trial_type=="learning"&stim_set==1),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, trial_type == "learning" & stim_set == 1)
## 
##      AIC      BIC   logLik deviance df.resid 
##    285.8    302.7   -137.9    275.8      211 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0339 -0.8018 -0.4965  0.8646  2.0101 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev.
##  subject          (Intercept) 0.000e+00 0.000000
##  available_image1 (Intercept) 1.156e-09 0.000034
##  available_image2 (Intercept) 8.500e-02 0.291546
## Number of obs: 216, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -0.06394    0.18124  -0.353    0.724    
## cur_exp_log_rel_freq_diff12  0.40736    0.09137   4.458 8.26e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.052 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.4191669 0.2912866
## cur_exp_log_rel_freq_diff12  0.2282756 0.5864464
Robustness: Block 2 alone
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,trial_type=="learning"&stim_set==2),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, trial_type == "learning" & stim_set == 2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    261.4    278.3   -125.7    251.4      211 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8889 -0.7176  0.3825  0.7020  1.9951 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  subject          (Intercept) 1.538e-09 3.921e-05
##  available_image1 (Intercept) 0.000e+00 0.000e+00
##  available_image2 (Intercept) 1.902e-01 4.362e-01
## Number of obs: 216, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   0.1715     0.2210   0.776    0.438    
## cur_exp_log_rel_freq_diff12   0.6306     0.1062   5.938 2.89e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.049 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.2616197 0.6046305
## cur_exp_log_rel_freq_diff12  0.4224639 0.8387851
Robustness: Interaction with age

The effect did not interact with age.

m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12*age_c+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,trial_type=="learning"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 * age_c + (1 |  
##     subject) + (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##    538.4    566.9   -262.2    524.4      425 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6770 -0.7671  0.3221  0.7605  2.0347 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  subject          (Intercept) 8.021e-02 0.2832106
##  available_image1 (Intercept) 5.317e-08 0.0002306
##  available_image2 (Intercept) 1.523e-01 0.3902039
## Number of obs: 432, groups:  
## subject, 36; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.03871    0.18208   0.213    0.832    
## cur_exp_log_rel_freq_diff12        0.55117    0.07448   7.400 1.36e-13 ***
## age_c                             -0.17860    0.14777  -1.209    0.227    
## cur_exp_log_rel_freq_diff12:age_c  0.09517    0.08425   1.130    0.259    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cr_____12 age_c 
## cr_xp____12  0.032                 
## age_c        0.012 -0.141          
## cr_____12:_ -0.078  0.098     0.052
confint(m, method="Wald")[4:7,]
##                                         2.5 %    97.5 %
## (Intercept)                       -0.31815036 0.3955783
## cur_exp_log_rel_freq_diff12        0.40518763 0.6971531
## age_c                             -0.46823453 0.1110306
## cur_exp_log_rel_freq_diff12:age_c -0.06996849 0.2603026
3-year-olds
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,condition=="active"&trial_type=="learning"&age_group=="three-year-olds"),glmerControl(optimizer="bobyqa"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, condition == "active" & trial_type == "learning" &  
##     age_group == "three-year-olds")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    147.2    160.6    -68.6    137.2      103 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.24409 -0.82146  0.00644  0.88991  1.56805 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  subject          (Intercept) 0.0000   0.0000  
##  available_image1 (Intercept) 0.0000   0.0000  
##  available_image2 (Intercept) 0.1737   0.4168  
## Number of obs: 108, groups:  
## subject, 9; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                   0.1906     0.2623   0.727  0.46737   
## cur_exp_log_rel_freq_diff12   0.4189     0.1301   3.219  0.00129 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.221 
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.3234897 0.7047796
## cur_exp_log_rel_freq_diff12  0.1638633 0.6740084
4-year-olds
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,condition=="active"&trial_type=="learning"&age_group=="four-year-olds"),glmerControl(optimizer="bobyqa"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, condition == "active" & trial_type == "learning" &  
##     age_group == "four-year-olds")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    200.0    215.3    -95.0    190.0      151 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6365 -0.7787  0.3924  0.7361  2.1109 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  subject          (Intercept) 0.25     0.5     
##  available_image1 (Intercept) 0.00     0.0     
##  available_image2 (Intercept) 0.00     0.0     
## Number of obs: 156, groups:  
## subject, 13; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                   0.1714     0.2264   0.757    0.449    
## cur_exp_log_rel_freq_diff12   0.5345     0.1207   4.427 9.55e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.065 
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.2724204 0.6152039
## cur_exp_log_rel_freq_diff12  0.2978422 0.7710685
5-year-olds
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(expS1_d,condition=="active"&trial_type=="learning"&age_group=="five-year-olds"),glmerControl(optimizer="bobyqa"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(expS1_d, condition == "active" & trial_type == "learning" &  
##     age_group == "five-year-olds")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    204.5    220.1    -97.2    194.5      163 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4925 -0.6847 -0.3147  0.7370  2.6634 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  subject          (Intercept) 0.02024  0.1423  
##  available_image1 (Intercept) 0.00000  0.0000  
##  available_image2 (Intercept) 0.41120  0.6412  
## Number of obs: 168, groups:  
## subject, 14; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  -0.1840     0.2937  -0.627    0.531    
## cur_exp_log_rel_freq_diff12   0.6763     0.1372   4.929 8.26e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 -0.059
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.7595826 0.3915601
## cur_exp_log_rel_freq_diff12  0.4073718 0.9452108

Plot

The plot depicts the likelihood of choosing an item (Image 1) as a function of the difference in informativeness to the alternative option (Image 2) in Experiment S1. The blue line represents the fit of the logistic mixed-effects model, with 95% confidence bands. Dots represent individual responses, with violin plots visualizing the distribution.

#create data frame to predict
expS1_inf_pX <- data.frame(cur_exp_log_rel_freq_diff12=seq(min(expS1_d$cur_exp_log_rel_freq_diff12,na.rm=T),max(expS1_d$cur_exp_log_rel_freq_diff12,na.rm=T),by=0.1))
expS1_inf_pY <- predictSE(m_inf_expS1,expS1_inf_pX,re.form=NA,type="response")
expS1_inf_pX <- expS1_inf_pX %>%
  mutate(
    fit=expS1_inf_pY$fit,
    se.fit=expS1_inf_pY$se.fit) %>%
  mutate(
    is_image1_choice = fit,
    y_lower = fit-se.fit,
    y_upper = fit+se.fit
  )

#create plot
expS1_informativeness <- ggplot(expS1_inf_pX,aes(cur_exp_log_rel_freq_diff12,is_image1_choice))+
  geom_vline(xintercept=0,linetype="solid")+
  geom_violinh(data=filter(expS1_d,trial_type=="learning"),aes(y=is_image1_choice,group=is_image1_choice),scale="count",width=0.1, trim=T)+
  geom_jitter(data=filter(expS1_d,trial_type=="learning"),aes(y=is_image1_choice,group=is_image1_choice),height=0.01)+
  geom_smooth(aes(ymin=y_lower,ymax=y_upper),stat="identity")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  coord_cartesian(xlim = c(-4,4), ylim = c(-0.05,1.05))+
  scale_x_continuous(breaks=seq(-4,4,1))+
  theme_classic(base_size=14)+
  xlab("Informativeness Difference Image 1 - Image 2")+
  ylab("Probability of Choosing Image 1")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))
expS1_informativeness

ggsave(here(figure_path,"expS1_informativeness_analysis.png"),width=9,height=6,bg="white")

Test Phase

Overall Accuracy

Children successfully learned the novel labels overall, achieving high accuracy in the Test Phase. Accuracy was similar in Block 1 and Block 2.

expS1_test_subj <- expS1_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,age,age_group,condition) %>%
  summarize(
    mean_accuracy = mean(is_right,na.rm=TRUE)
  )

expS1_test_summarized <- expS1_test_subj %>%
  group_by(condition) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
expS1_test_summarized %>%
  select(-avg_accuracy_ci) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Condition", "N", "Average Accuracy","CI"),digits=3)
Condition N Average Accuracy CI
active 36 0.795 [0.742, 0.848]
expS1_test_subj_block <- expS1_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,age,age_group,condition,stim_set) %>%
  summarize(
    mean_accuracy = mean(is_right,na.rm=TRUE)
  ) %>%
  mutate(block = ifelse(stim_set==1,"BLOCK 1","BLOCK 2"))

expS1_test_summarized_block <- expS1_test_subj_block %>%
  group_by(condition,stim_set, block) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
expS1_test_summarized_block %>%
  ungroup() %>%
  arrange(block) %>%
  select(-avg_accuracy_ci,-stim_set) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(block,condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Block","Condition", "N", "Average Accuracy","CI"),digits=3)
Block Condition N Average Accuracy CI
BLOCK 1 active 36 0.812 [0.745, 0.88]
BLOCK 2 active 36 0.778 [0.714, 0.842]
#just a quick t-test
t.test(
  filter(expS1_test_subj_block,stim_set==1)$mean_accuracy,
  filter(expS1_test_subj_block,stim_set==2)$mean_accuracy,
  paired=T)
## 
##  Paired t-test
## 
## data:  filter(expS1_test_subj_block, stim_set == 1)$mean_accuracy and filter(expS1_test_subj_block, stim_set == 2)$mean_accuracy
## t = 0.90303, df = 35, p-value = 0.3727
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -0.04333669  0.11278113
## sample estimates:
## mean difference 
##      0.03472222

Age effect

There was no change in test accuracy across age.

### Main Model
m <- glmer(is_right~age_c+(1|subject)+(1|target_image), subset(expS1_d, trial_type=="test"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ age_c + (1 | subject) + (1 | target_image)
##    Data: subset(expS1_d, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##    572.1    589.5   -282.0    564.1      572 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9887  0.2828  0.3795  0.5213  0.9982 
## 
## Random effects:
##  Groups       Name        Variance  Std.Dev. 
##  subject      (Intercept) 6.255e-01 7.909e-01
##  target_image (Intercept) 1.669e-10 1.292e-05
## Number of obs: 576, groups:  subject, 36; target_image, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   1.5363     0.1805   8.511   <2e-16 ***
## age_c         0.2117     0.2174   0.974     0.33    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr)
## age_c 0.041 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Below is a plot of the age effect.

ggplot(expS1_test_subj,
       aes(age,mean_accuracy))+
  geom_hline(yintercept = 0.5,linetype="dashed")+
  geom_point()+
  geom_smooth(method="lm")+
  xlab("Age (in years)")+
  ylab("Test Accuracy in Experiment S1")

ggsave(here(figure_path,"expS1_test_accuracy_across_age.png"),width=9,height=6,bg = "white")

Relationships between Exposure, Sampling, and Test

#### combine test and choice data
#test data
expS1_subj_accuracy <-  expS1_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,yoked_id,age,age_group,condition,stim_set,target_kind, target_image) %>%
  summarize(acc=mean(is_right),
            mean_rt = mean(rt),
            median_rt = median(rt))
#choice data
expS1_choice_kind_active <- expS1_d %>%
  filter(condition=="active"&trial_type=="test") %>%
  group_by(yoked_id,stim_set, target_kind, target_image) %>%
  summarize(present=1) %>%
  rename(
    choice_kind=target_kind
  )
#combine
expS1_choice_type_complete <- left_join(expS1_choice_kind_active,expS1_choice_type_complete) %>%
  select(-choice_image,-present)

#overall item-level
expS1_test_choice_type <-left_join(expS1_subj_accuracy, expS1_choice_type_complete) %>%
  mutate(
    exposure_frequency = case_when(
      target_kind=="high" ~5,
      target_kind=="medium" ~ 2,
      target_kind=="low" ~ 1,
      target_kind=="no" ~ 0),
    total_frequency = exposure_frequency + choice_num
  )

#trial-by-trial level
expS1_test_d <- expS1_d %>%
  filter(trial_type=="test") %>%
  select(subject,trial_type,trial_num,yoked_id,age,age_group,condition,stim_set,target_kind, target_image,is_right)

expS1_test_d_choice_type <-expS1_test_d %>%
  left_join(expS1_choice_type_complete) %>%
  mutate(
    exposure_frequency = case_when(
      target_kind=="high" ~5,
      target_kind=="medium" ~ 2,
      target_kind=="low" ~ 1,
      target_kind=="no" ~ 0),
    total_frequency = exposure_frequency + choice_num
  ) %>%
  mutate(
    stim_set_c = ifelse(stim_set==1,-0.5,0.5),
    stim_set_1 = ifelse(stim_set==1,0,1),
    stim_set_2 = ifelse(stim_set==1,-1,0)
  )

Exposure and Sampling Frequency Predicting Test Accuracy

We explored how children’s frequency of exposure during the Exposure Phase and the Sampling Phase predicted accuracy. We fit a logistic mixed-effects model predicting trial-by-trial test accuracy from Exposure Phase frequency and Sampling Phase frequency. We included the interaction between each of these predictors and experiment block (centered) as fixed effects, as well as random intercepts for participants and items. We did not find evidence that frequency during training was associated with test performance.

#exposure frequency and choices
#check interaction with block
m <- glmer(is_right ~(exposure_frequency+choice_num)*stim_set_c+(1|subject)+(1|target_image),data=expS1_test_d_choice_type,family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ (exposure_frequency + choice_num) * stim_set_c + (1 |  
##     subject) + (1 | target_image)
##    Data: expS1_test_d_choice_type
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    575.5    610.4   -279.8    559.5      568 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4391  0.2887  0.3765  0.5117  1.0723 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.6728   0.8202  
##  target_image (Intercept) 0.0000   0.0000  
## Number of obs: 576, groups:  subject, 36; target_image, 8
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    1.266987   0.353503   3.584 0.000338 ***
## exposure_frequency            -0.012029   0.062487  -0.193 0.847346    
## choice_num                     0.210802   0.160298   1.315 0.188488    
## stim_set_c                     0.491716   0.642409   0.765 0.444019    
## exposure_frequency:stim_set_c  0.006436   0.124969   0.051 0.958930    
## choice_num:stim_set_c         -0.511567   0.320668  -1.595 0.110641    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_ chc_nm stm_s_ ex_:__
## expsr_frqnc -0.605                            
## choice_num  -0.780  0.367                     
## stim_set_c   0.001 -0.054  0.050              
## expsr_fr:__ -0.049  0.013  0.066 -0.665       
## chc_nm:st__  0.040  0.066 -0.143 -0.864  0.367
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")
##                                    2.5 %    97.5 %
## .sig01                                NA        NA
## .sig02                                NA        NA
## (Intercept)                    0.5741337 1.9598412
## exposure_frequency            -0.1345015 0.1104432
## choice_num                    -0.1033759 0.5249808
## stim_set_c                    -0.7673835 1.7508151
## exposure_frequency:stim_set_c -0.2385001 0.2513712
## choice_num:stim_set_c         -1.1400651 0.1169307

Plot

Below, we visualize the relationship between exposure frequency and test accuracy (plot 1) and sampling frequency and test accuracy (plot 2), split by block.

#exposure frequency and test accuracy
ggplot(expS1_test_d_choice_type,aes(exposure_frequency,is_right))+
  geom_smooth(method="lm")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1), limits=c(-0.1,1.1))+
  facet_wrap(~stim_set)+
  xlab("Exposure Phase Frequency")+
  ylab("Test Accuracy")

#sampling frequency and test accuracy
ggplot(expS1_test_d_choice_type,aes(choice_num,is_right))+
  geom_smooth(method="lm")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1), limits=c(-0.1,1.1))+
  facet_wrap(~stim_set)+
  xlab("Sampling Phase Frequency")+
  ylab("Test Accuracy")

Model Fit for each Block

We also investigated the predictions of the model for each block by recentering the block predictor on Block 1 and Block 2, respectively. Higher sampling frequency was marginally associated with higher test accuracy specifically in the first experiment block, but otherwise there were no notable effects in Block 1 or in Block 2.

Model results centered on Block 1:

#block 1
m <- glmer(is_right ~(exposure_frequency+choice_num)*stim_set_1+(1|subject)+(1|target_image),data=expS1_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ (exposure_frequency + choice_num) * stim_set_1 + (1 |  
##     subject) + (1 | target_image)
##    Data: expS1_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##    575.5    610.4   -279.8    559.5      568 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4391  0.2887  0.3765  0.5117  1.0723 
## 
## Random effects:
##  Groups       Name        Variance  Std.Dev. 
##  subject      (Intercept) 6.728e-01 0.8202604
##  target_image (Intercept) 4.341e-08 0.0002084
## Number of obs: 576, groups:  subject, 36; target_image, 8
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                    1.021099   0.477441   2.139   0.0325 *
## exposure_frequency            -0.015245   0.087814  -0.174   0.8622  
## choice_num                     0.466602   0.242371   1.925   0.0542 .
## stim_set_1                     0.491718   0.642414   0.765   0.4440  
## exposure_frequency:stim_set_1  0.006434   0.124970   0.051   0.9589  
## choice_num:stim_set_1         -0.511566   0.320670  -1.595   0.1106  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_ chc_nm stm__1 e_:__1
## expsr_frqnc -0.585                            
## choice_num  -0.808  0.283                     
## stim_set_1  -0.672  0.435  0.604              
## expsr_f:__1  0.411 -0.703 -0.199 -0.665       
## chc_nm:s__1  0.611 -0.214 -0.756 -0.864  0.367
confint(m, method="Wald")
##                                      2.5 %    97.5 %
## .sig01                                  NA        NA
## .sig02                                  NA        NA
## (Intercept)                    0.085332193 1.9568653
## exposure_frequency            -0.187357245 0.1568671
## choice_num                    -0.008436249 0.9416394
## stim_set_1                    -0.767389558 1.7508253
## exposure_frequency:stim_set_1 -0.238502845 0.2513701
## choice_num:stim_set_1         -1.140068228 0.1169358

Model results centered on Block 2:

#block 2
m <- glmer(is_right ~(exposure_frequency+choice_num)*stim_set_2+(1|subject)+(1|target_image),data=expS1_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ (exposure_frequency + choice_num) * stim_set_2 + (1 |  
##     subject) + (1 | target_image)
##    Data: expS1_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##    575.5    610.4   -279.8    559.5      568 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4390  0.2887  0.3765  0.5117  1.0723 
## 
## Random effects:
##  Groups       Name        Variance  Std.Dev. 
##  subject      (Intercept) 6.727e-01 8.202e-01
##  target_image (Intercept) 5.592e-10 2.365e-05
## Number of obs: 576, groups:  subject, 36; target_image, 8
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                    1.512857   0.477845   3.166  0.00155 **
## exposure_frequency            -0.008798   0.088921  -0.099  0.92118   
## choice_num                    -0.045006   0.209913  -0.214  0.83023   
## stim_set_2                     0.491685   0.642429   0.765  0.44406   
## exposure_frequency:stim_set_2  0.006451   0.124971   0.052  0.95883   
## choice_num:stim_set_2         -0.511540   0.320675  -1.595  0.11067   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_ chc_nm stm__2 e_:__2
## expsr_frqnc -0.680                            
## choice_num  -0.836  0.464                     
## stim_set_2   0.673 -0.506 -0.622              
## expsr_f:__2 -0.484  0.712  0.330 -0.665       
## chc_nm:s__2 -0.551  0.304  0.655 -0.864  0.367
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")
##                                    2.5 %    97.5 %
## .sig01                                NA        NA
## .sig02                                NA        NA
## (Intercept)                    0.5762986 2.4494149
## exposure_frequency            -0.1830800 0.1654839
## choice_num                    -0.4564275 0.3664154
## stim_set_2                    -0.7674527 1.7508237
## exposure_frequency:stim_set_2 -0.2384878 0.2513894
## choice_num:stim_set_2         -1.1400510 0.1169709

Total Frequency and Test Accuracy

As in Experiment 2, we also explored whether the combined frequency of children’s exposure to each object-label association during the Exposure Phase and the Sampling Phase would predict their later test accuracy (not reported in supplement). As above, there was little evidence of a relationship between total frequency and test accuracy and no interaction with block.

## total frequency
#significant interaction with block
m <- glmer(is_right ~total_frequency*stim_set_c+(1|subject)+(1|target_image)+(1|yoked_id),data=expS1_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ total_frequency * stim_set_c + (1 | subject) + (1 |  
##     target_image) + (1 | yoked_id)
##    Data: expS1_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##    577.8    608.3   -281.9    563.8      569 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9874  0.2818  0.3757  0.5044  1.0353 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  yoked_id     (Intercept) 0.63679  0.7980  
##  subject      (Intercept) 0.02511  0.1585  
##  target_image (Intercept) 0.00000  0.0000  
## Number of obs: 576, groups:  yoked_id, 36; subject, 36; target_image, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 1.556497   0.284386   5.473 4.42e-08 ***
## total_frequency            -0.004021   0.061958  -0.065    0.948    
## stim_set_c                 -0.206722   0.484411  -0.427    0.670    
## total_frequency:stim_set_c -0.008042   0.123915  -0.065    0.948    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ttl_fr stm_s_
## totl_frqncy -0.764              
## stim_set_c   0.006 -0.022       
## ttl_frqn:__ -0.019  0.023 -0.897
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")
##                                 2.5 %    97.5 %
## .sig01                             NA        NA
## .sig02                             NA        NA
## .sig03                             NA        NA
## (Intercept)                 0.9991106 2.1138833
## total_frequency            -0.1254568 0.1174149
## stim_set_c                 -1.1561510 0.7427066
## total_frequency:stim_set_c -0.2509111 0.2348271

Model Fit for each Block

There were also no notable effects for each block.

Total Frequency model results centered on Block 1:

#Block 1 alone
m <- glmer(is_right ~total_frequency*stim_set_1+(1|subject)+(1|target_image)+(1|yoked_id),data=expS1_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ total_frequency * stim_set_1 + (1 | subject) + (1 |  
##     target_image) + (1 | yoked_id)
##    Data: expS1_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##    577.8    608.3   -281.9    563.8      569 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9874  0.2818  0.3757  0.5044  1.0353 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  yoked_id     (Intercept) 0.62672  0.7917  
##  subject      (Intercept) 0.03518  0.1876  
##  target_image (Intercept) 0.00000  0.0000  
## Number of obs: 576, groups:  yoked_id, 36; subject, 36; target_image, 8
## 
## Fixed effects:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 1.660e+00  3.725e-01   4.456 8.36e-06 ***
## total_frequency            -1.907e-08  8.660e-02   0.000    1.000    
## stim_set_1                 -2.067e-01  4.844e-01  -0.427    0.670    
## total_frequency:stim_set_1 -8.042e-03  1.239e-01  -0.065    0.948    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ttl_fr stm__1
## totl_frqncy -0.814              
## stim_set_1  -0.646  0.626       
## ttl_frq:__1  0.568 -0.699 -0.897
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")
##                                 2.5 %    97.5 %
## .sig01                             NA        NA
## .sig02                             NA        NA
## .sig03                             NA        NA
## (Intercept)                 0.9297337 2.3899828
## total_frequency            -0.1697405 0.1697405
## stim_set_1                 -1.1561496 0.7427041
## total_frequency:stim_set_1 -0.2509107 0.2348270

Total Frequency model results centered on Block 2:

#Block 2 alone
m <- glmer(is_right ~total_frequency*stim_set_2+(1|subject)+(1|target_image)+(1|yoked_id),data=expS1_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ total_frequency * stim_set_2 + (1 | subject) + (1 |  
##     target_image) + (1 | yoked_id)
##    Data: expS1_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##    577.8    608.3   -281.9    563.8      569 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9874  0.2818  0.3757  0.5044  1.0353 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  yoked_id     (Intercept) 0.64558  0.8035  
##  subject      (Intercept) 0.01632  0.1277  
##  target_image (Intercept) 0.00000  0.0000  
## Number of obs: 576, groups:  yoked_id, 36; subject, 36; target_image, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 1.453136   0.374576   3.879 0.000105 ***
## total_frequency            -0.008042   0.088628  -0.091 0.927700    
## stim_set_2                 -0.206722   0.484417  -0.427 0.669566    
## total_frequency:stim_set_2 -0.008042   0.123916  -0.065 0.948255    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ttl_fr stm__2
## totl_frqncy -0.831              
## stim_set_2   0.651 -0.642       
## ttl_frq:__2 -0.594  0.715 -0.897
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")
##                                 2.5 %    97.5 %
## .sig01                             NA        NA
## .sig02                             NA        NA
## .sig03                             NA        NA
## (Intercept)                 0.7189798 2.1872921
## total_frequency            -0.1817495 0.1656655
## stim_set_2                 -1.1561621 0.7427181
## total_frequency:stim_set_2 -0.2509135 0.2348295

Experiment 2

exp2_d <- d_post_exclusion %>%
  filter(experiment=="exp2")

Sampling Phase

Sampling Choices

Participants in the Active Condition selected items systematically during the Sampling Phase, depending on exposure frequency. On average, sampling choices scaled steadily with the frequency with which object-label items were heard during the Exposure Phase.

#### first, create a data frame summarizing exposure frequencies for each subject
#collect all unique image/ audio combinations
exp2_images_data_frame <- exp2_d %>%
  filter(trial_type=="exposure") %>%
  select(yoked_id,subject,condition, stim_set,image1:image4,audio1:audio4) %>%
  group_by(yoked_id,subject,condition, stim_set) %>%
  pivot_longer(
    cols = c(image1:image4,audio1:audio4),
    names_to = c(".value","stim_num"),
    names_pattern = "(image|audio)(\\d)"
  ) %>%
  distinct()
#summarize frequencies during Exposure Phase
exp2_exposure_frequencies <- exp2_d %>%
  filter(trial_type=="exposure") %>%
  select(yoked_id,subject,condition, stim_set,choice_image,choice_audio) %>%
  group_by(yoked_id,subject,condition, stim_set,choice_image,choice_audio) %>%
  summarize(
    exposure_frequency = n()
  ) %>%
  rename(
    image=choice_image,
    audio=choice_audio
  ) 
#join and handle images with 0 occurrences
exp2_exposure_data_frame <- exp2_images_data_frame %>%
  left_join(exp2_exposure_frequencies) %>%
  mutate(exposure_frequency=ifelse(is.na(exposure_frequency),0,exposure_frequency)) %>%
  mutate(
    exposure_kind = case_when(
      exposure_frequency == 5 ~ "high",
      exposure_frequency == 2 ~ "medium",
      exposure_frequency == 1 ~ "low",
      exposure_frequency == 0 ~ "no"
    )
  )

#choices by exposure type
exp2_choice_data_frame <- exp2_exposure_data_frame %>%
  filter(condition=="active") %>%
  select(-stim_num) %>%
  rename(
    choice_image=image,
    choice_audio=audio,
    choice_kind=exposure_kind
  )
exp2_choice_type <-  exp2_d %>%
  filter(condition=="active" & trial_type=="learning") %>%
  mutate(choice_kind=as.factor(choice_kind)) %>%
  group_by(subject,yoked_id,stim_set, choice_kind,choice_image) %>%
  summarize(choice_num=n())
exp2_choice_type_complete <- exp2_choice_data_frame %>%
  left_join(exp2_choice_type) %>%
  mutate(choice_num=ifelse(is.na(choice_num),0,choice_num)) %>%
  mutate(choice_prob = choice_num/3)

#join in exposure and sampling
exp2_choice_type_complete_for_join <- exp2_choice_type_complete %>%
  ungroup() %>%
  select(yoked_id,stim_set,choice_image,choice_audio,choice_kind,choice_num) %>%
  rename(yoked_active_kind=choice_kind)
exp2_exposure_sampling_complete <- exp2_exposure_data_frame %>%
  rename(
    choice_image=image,
    choice_audio=audio
  ) %>%
  left_join(exp2_choice_type_complete_for_join) %>%
  rename(
    target_image = choice_image,
    target_audio = choice_audio
  ) %>%
  #compute exposure difference
  mutate(
    yoked_active_exposure_frequency = case_when(
      yoked_active_kind == "high" ~ 5,
      yoked_active_kind == "medium" ~ 2,
      yoked_active_kind == "low" ~ 1,
      yoked_active_kind == "no" ~ 0
    )
  ) %>%
  mutate(exposure_difference = exposure_frequency -yoked_active_exposure_frequency)
  

exp2_summarize_choice_type <- summarySEwithin(
  data=exp2_choice_type_complete,
  measurevar="choice_num",
  withinvars=c("choice_kind"),
  idvar="subject") %>%
  mutate(
    lower_ci = choice_num - ci,
    upper_ci = choice_num + ci)
exp2_summarize_choice_type_block <- summarySEwithin(
  data=exp2_choice_type_complete,
  measurevar="choice_num",
  withinvars=c("stim_set","choice_kind"),
  idvar="subject") %>%
  mutate(
    lower_ci = choice_num - ci,
    upper_ci = choice_num + ci) %>%
  mutate(block = ifelse(stim_set==1,"BLOCK 1","BLOCK 2"))

#table
exp2_summarize_choice_type %>%
  select(-N,-ciMult,-choice_num_norm,-sd,-se,-ci) %>%
  arrange(choice_num) %>%
  kable()
choice_kind choice_num lower_ci upper_ci
high 1.110390 0.9700902 1.250689
medium 1.454546 1.3354854 1.573605
low 1.512987 1.3716284 1.654346
no 1.922078 1.7916146 2.052541
#plot
#relevel choice kind
exp2_summarize_choice_type$choice_kind <- factor(exp2_summarize_choice_type$choice_kind,levels = c("high","medium","low","no"))
exp2_sampling_choices_plot <- ggplot(exp2_summarize_choice_type,aes(choice_kind,choice_num, color=choice_kind, fill=choice_kind))+
  geom_bar(stat="identity",size=2,alpha=0.05)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),color="black",width=0.1)+
  theme_classic()+
  scale_x_discrete(labels=c("5 Exposures","2 Exposures","1 Exposure","0 Exposures"),limits=c("high","medium","low","no"))+
  theme_classic()+
  scale_color_viridis(discrete=T)+
  scale_fill_viridis(discrete=T)+
  xlab("Exposure Item")+
  ylab("Average Sampling Choice Frequency")+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(size=13),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")
exp2_sampling_choices_plot

ggsave(here(figure_path,"exp2_sampling_choices_by_exposure.png"),width=9,height=6,bg="white")

Lower Exposure Choices

Descriptives

Next, we asked whether children weighed the relative informativeness of their options in selecting between the two items on a given sampling trial. To investigate this question, we tested whether children preferentially selected the item with the lower frequency during the Exposure Phase on a given sampling trial (which always presented two options). Children preferentially selected the item with the lower frequency during the Exposure Phase.

#overall
exp2_sum_subj_choices <- exp2_d %>%
  filter(condition=="active" &trial_type=="learning") %>%
  group_by(subject,age_group,age,condition,choice_trial_type) %>%
  summarize(
    avg_lower_exposure_choice=mean(lower_exposure_choice))
##by block
exp2_sum_subj_choices_block <- exp2_d %>%
  filter(condition=="active" &trial_type=="learning") %>%
  group_by(subject,age_group,age,condition,choice_trial_type,stim_set) %>%
  summarize(
    avg_lower_exposure_choice=mean(lower_exposure_choice))

#overall
exp2_overall_exposure_choices_subj <- exp2_sum_subj_choices %>%
  group_by(subject,age_group,age,condition) %>%
  summarize(mean_lower_exposure_choice=mean(avg_lower_exposure_choice))
##by block
exp2_overall_exposure_choices_subj_block <- exp2_sum_subj_choices_block %>%
  group_by(subject,age_group,age,condition,stim_set) %>%
  summarize(mean_lower_exposure_choice=mean(avg_lower_exposure_choice))

## summarize
#within-subject-corrected
## by block
exp2_overall_exposure_choices_within_corrected_block <- summarySEwithin(filter(exp2_overall_exposure_choices_subj_block, condition=="active"),"mean_lower_exposure_choice", betweenvars=c("condition"),withinvars=c("stim_set"),idvar="subject" ) %>%
  mutate(lower_ci = mean_lower_exposure_choice - ci,upper_ci = mean_lower_exposure_choice  + ci)
#uncorrected (reported)
exp2_overall_exposure_choices <- summarySE(filter(exp2_overall_exposure_choices_subj, condition=="active"),"mean_lower_exposure_choice", groupvars=c("condition"))%>%
  mutate(lower_ci = mean_lower_exposure_choice - ci,upper_ci = mean_lower_exposure_choice  + ci)
## by block
exp2_overall_exposure_choices_block <- summarySE(filter(exp2_overall_exposure_choices_subj_block, condition=="active"),"mean_lower_exposure_choice", groupvars=c("condition","stim_set"))%>%
  mutate(lower_ci = mean_lower_exposure_choice - ci,upper_ci = mean_lower_exposure_choice  + ci)
Overall
#t-tests
#overall
t.test(subset(exp2_overall_exposure_choices_subj, condition=="active")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(exp2_overall_exposure_choices_subj, condition == "active")$mean_lower_exposure_choice
## t = 7.0745, df = 76, p-value = 6.354e-10
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5800894 0.6428543
## sample estimates:
## mean of x 
## 0.6114719
By Block
Block 1
#Block 1
t.test(filter(exp2_overall_exposure_choices_subj_block, condition=="active"&stim_set==1)$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  filter(exp2_overall_exposure_choices_subj_block, condition == "active" & stim_set == 1)$mean_lower_exposure_choice
## t = 5.5701, df = 76, p-value = 3.687e-07
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5695273 0.6469230
## sample estimates:
## mean of x 
## 0.6082251
Block 2
#Block 2
t.test(filter(exp2_overall_exposure_choices_subj_block, condition=="active"&stim_set==2)$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  filter(exp2_overall_exposure_choices_subj_block, condition == "active" & stim_set == 2)$mean_lower_exposure_choice
## t = 5.0086, df = 76, p-value = 3.466e-06
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5691009 0.6603364
## sample estimates:
## mean of x 
## 0.6147186

Age Effects

m <- lm(mean_lower_exposure_choice~ age,subset(exp2_overall_exposure_choices_subj, condition=="active"))
summary(m)
## 
## Call:
## lm(formula = mean_lower_exposure_choice ~ age, data = subset(exp2_overall_exposure_choices_subj, 
##     condition == "active"))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26698 -0.07077 -0.01283  0.08882  0.35597 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.33218    0.08498   3.909 0.000202 ***
## age          0.06105    0.01829   3.338 0.001318 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1299 on 75 degrees of freedom
## Multiple R-squared:  0.1293, Adjusted R-squared:  0.1177 
## F-statistic: 11.14 on 1 and 75 DF,  p-value: 0.001318
model_estimates_exp2_age <- broom::tidy(m) %>%
  mutate(df=m$df.residual)
model_estimates_exp2_age$lower_ci <- confint(m)[,1]
model_estimates_exp2_age$upper_ci <- confint(m)[,2]

We also investigated whether age (treated as a continuous variable) predicted children’s mean proportion of lower frequency items selected in simple linear regression model, finding that the proportion of lower frequency choices increased with age, b = 0.06, 95% Wald CI = [0.02, 0.1], t(75) = 3.34, p = 0.0013.

Below, we show a plot of this effect.

ggplot(subset(exp2_overall_exposure_choices_subj, condition=="active"),
       aes(age,mean_lower_exposure_choice))+
  geom_hline(yintercept = 0.5,linetype="dashed")+
  geom_point()+
  geom_smooth(method="lm")+
  xlab("Age (in years)")+
  ylab("Proportion Lower Exposure Choices")

ggsave(here(figure_path,"exp2_lower_exposure_choices_across_age.png"),width=9,height=6,bg = "white")
3-year-olds
t.test(subset(exp2_overall_exposure_choices_subj, condition=="active"&age_group=="three-year-olds")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(exp2_overall_exposure_choices_subj, condition == "active" & age_group == "three-year-olds")$mean_lower_exposure_choice
## t = 2.037, df = 21, p-value = 0.05446
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.4988895 0.6071711
## sample estimates:
## mean of x 
## 0.5530303
4-year-olds
t.test(subset(exp2_overall_exposure_choices_subj, condition=="active"&age_group=="four-year-olds")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(exp2_overall_exposure_choices_subj, condition == "active" & age_group == "four-year-olds")$mean_lower_exposure_choice
## t = 3.733, df = 24, p-value = 0.001031
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.5447125 0.6552875
## sample estimates:
## mean of x 
##       0.6
5-year-olds
t.test(subset(exp2_overall_exposure_choices_subj, condition=="active"&age_group=="five-year-olds")$mean_lower_exposure_choice,mu=0.5, var.equal=T)
## 
##  One Sample t-test
## 
## data:  subset(exp2_overall_exposure_choices_subj, condition == "active" & age_group == "five-year-olds")$mean_lower_exposure_choice
## t = 6.5278, df = 29, p-value = 3.785e-07
## alternative hypothesis: true mean is not equal to 0.5
## 95 percent confidence interval:
##  0.6125409 0.7152368
## sample estimates:
## mean of x 
## 0.6638889

Robustness Check

Robustness Check: Model-Based approach with logistic mixed-effects models

We obtain similar effects when fitting a trial-by-trial logistic mixed-effects model predicting whether participants choose the lower-exposure item.

Overall
#overall
m <- glmer(lower_exposure_choice~1+(1|available_image1)+(1|available_image2),data=subset(exp2_d,condition=="active" & trial_type=="learning"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## lower_exposure_choice ~ 1 + (1 | available_image1) + (1 | available_image2)
##    Data: subset(exp2_d, condition == "active" & trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1240.6   1255.1   -617.3   1234.6      921 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2729 -1.2482  0.7894  0.8002  0.8071 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  available_image1 (Intercept) 0.000000 0.00000 
##  available_image2 (Intercept) 0.003433 0.05859 
## Number of obs: 924, groups:  available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.45403    0.07072    6.42 1.36e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Block 1
m <- glmer(lower_exposure_choice~1+(1|available_image1)+(1|available_image2),data=subset(exp2_d,condition=="active" & trial_type=="learning"&stim_set==1), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## lower_exposure_choice ~ 1 + (1 | available_image1) + (1 | available_image2)
##    Data: subset(exp2_d, condition == "active" & trial_type == "learning" &  
##     stim_set == 1)
## 
##      AIC      BIC   logLik deviance df.resid 
##    624.7    637.1   -309.3    618.7      459 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2460 -1.2460  0.8026  0.8026  0.8026 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  available_image1 (Intercept) 0        0       
##  available_image2 (Intercept) 0        0       
## Number of obs: 462, groups:  available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.43986    0.09531   4.615 3.93e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Block 2
#stimSet 2
m <- glmer(lower_exposure_choice~1+(1|available_image1)+(1|available_image2),data=subset(exp2_d,condition=="active" & trial_type=="learning"&stim_set==2), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## lower_exposure_choice ~ 1 + (1 | available_image1) + (1 | available_image2)
##    Data: subset(exp2_d, condition == "active" & trial_type == "learning" &  
##     stim_set == 2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    621.3    633.7   -307.7    615.3      459 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3350 -1.2249  0.7499  0.7799  0.8764 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  available_image1 (Intercept) 0.00000  0.0000  
##  available_image2 (Intercept) 0.03631  0.1906  
## Number of obs: 462, groups:  available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.4712     0.1179   3.997 6.42e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Age

The age effect also holds in this logistic mixed-effects model.

#center age
exp2_d <- exp2_d %>%
  mutate(age_c = age - mean(age,na.rm=T))

#overall
m <- glmer(lower_exposure_choice~age_c+(1|available_image1)+(1|available_image2),data=subset(exp2_d,condition=="active" & trial_type=="learning"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: lower_exposure_choice ~ age_c + (1 | available_image1) + (1 |  
##     available_image2)
##    Data: subset(exp2_d, condition == "active" & trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1233.1   1252.4   -612.5   1225.1      920 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5163 -1.1834  0.7108  0.8093  0.9859 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  available_image1 (Intercept) 0.000000 0.00000 
##  available_image2 (Intercept) 0.003198 0.05655 
## Number of obs: 924, groups:  available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  0.46525    0.07101   6.552 5.69e-11 ***
## age_c        0.25790    0.08403   3.069  0.00215 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr)
## age_c 0.073 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Age Effect within each age group
3-year-olds
m <- glmer(lower_exposure_choice~(1|available_image1)+(1|available_image2),data=subset(exp2_d,condition=="active" & trial_type=="learning"& age_group=="three-year-olds"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## lower_exposure_choice ~ (1 | available_image1) + (1 | available_image2)
##    Data: subset(exp2_d, condition == "active" & trial_type == "learning" &  
##     age_group == "three-year-olds")
## 
##      AIC      BIC   logLik deviance df.resid 
##    369.0    379.7   -181.5    363.0      261 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.1276 -1.1036  0.8868  0.8925  0.9158 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  available_image1 (Intercept) 0.000000 0.00000 
##  available_image2 (Intercept) 0.008068 0.08982 
## Number of obs: 264, groups:  available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   0.2143     0.1284   1.669   0.0951 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: lower_exposure_choice
##              Chisq Df Pr(>Chisq)  
## (Intercept) 2.7856  1    0.09511 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
4-year-olds
m <- glmer(lower_exposure_choice~(1|available_image1)+(1|available_image2),data=subset(exp2_d,condition=="active" & trial_type=="learning"& age_group=="four-year-olds"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## lower_exposure_choice ~ (1 | available_image1) + (1 | available_image2)
##    Data: subset(exp2_d, condition == "active" & trial_type == "learning" &  
##     age_group == "four-year-olds")
## 
##      AIC      BIC   logLik deviance df.resid 
##    409.8    420.9   -201.9    403.8      297 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.2247 -1.2247  0.8165  0.8165  0.8165 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  available_image1 (Intercept) 0        0       
##  available_image2 (Intercept) 0        0       
## Number of obs: 300, groups:  available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.4055     0.1179    3.44 0.000581 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: lower_exposure_choice
##              Chisq Df Pr(>Chisq)    
## (Intercept) 11.837  1  0.0005807 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
5-year-olds
m <- glmer(lower_exposure_choice~(1|available_image1)+(1|available_image2),data=subset(exp2_d,condition=="active" & trial_type=="learning"& age_group=="five-year-olds"), family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## lower_exposure_choice ~ (1 | available_image1) + (1 | available_image2)
##    Data: subset(exp2_d, condition == "active" & trial_type == "learning" &  
##     age_group == "five-year-olds")
## 
##      AIC      BIC   logLik deviance df.resid 
##    465.6    477.3   -229.8    459.6      357 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.4227 -1.3901  0.7029  0.7118  0.7247 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  available_image1 (Intercept) 0.000000 0.00000 
##  available_image2 (Intercept) 0.006739 0.08209 
## Number of obs: 360, groups:  available_image1, 8; available_image2, 8
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   0.6812     0.1155   5.898 3.67e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: lower_exposure_choice
##              Chisq Df Pr(>Chisq)    
## (Intercept) 34.792  1  3.669e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot

#proportion choices for different choice pairs
exp2_sum_choices <- exp2_sum_subj_choices %>%
  group_by(condition,choice_trial_type) %>%
  summarize(
    N=n(),
    lower_exposure_choice_prob=mean(avg_lower_exposure_choice),
    sd_le = sd(avg_lower_exposure_choice,na.rm=TRUE),
    ci = qt(0.975, N-1)*sd_le/sqrt(N),
    lower_ci = lower_exposure_choice_prob - ci,
    upper_ci = lower_exposure_choice_prob + ci,
    ) %>%
  mutate(
    highest_freq = case_when(
      choice_trial_type %in% c("high_low","high_no","high_medium") ~ "high",
      choice_trial_type %in% c("low_medium","medium_no") ~ "medium",
      choice_trial_type %in% c("low_no") ~ "low",
      TRUE ~ NA_character_),
    low_freq = case_when(
      choice_trial_type %in% c("low_no","high_no","medium_no") ~ "no",
      choice_trial_type %in% c("low_medium","high_low") ~ "low",
      choice_trial_type %in% c("high_medium") ~ "medium",
      TRUE ~ NA_character_)
    ) %>%
  mutate(
    highest_freq_f = factor(highest_freq, levels=c("high","medium","low"), labels=c("5 Exposures","2 Exposures","1 Exposure"))
  )
##by block
exp2_sum_choices_block <- exp2_sum_subj_choices_block %>%
  group_by(condition,choice_trial_type,stim_set) %>%
  summarize(
    N=n(),
    lower_exposure_choice_prob=mean(avg_lower_exposure_choice),
    ci.lower=binom.test(sum(avg_lower_exposure_choice),N)$conf.int[1],
    ci.upper=binom.test(sum(avg_lower_exposure_choice),N)$conf.int[2]) %>%
  mutate(
    highest_freq = case_when(
      choice_trial_type %in% c("high_low","high_no","high_medium") ~ "high",
      choice_trial_type %in% c("low_medium","medium_no") ~ "medium",
      choice_trial_type %in% c("low_no") ~ "low",
      TRUE ~ NA_character_),
    low_freq = case_when(
      choice_trial_type %in% c("low_no","high_no","medium_no") ~ "no",
      choice_trial_type %in% c("low_medium","high_low") ~ "low",
      choice_trial_type %in% c("high_medium") ~ "medium",
      TRUE ~ NA_character_)
    ) %>%
  mutate(
    highest_freq_f = factor(highest_freq, levels=c("high","medium","low"), labels=c("5 Exposures","2 Exposures","1 Exposure"))
  ) %>%
  mutate(
    block = ifelse(stim_set==1,"BLOCK 1","BLOCK 2")
  )
Overall
#plot
exp2_sampling_pairs <- ggplot(subset(exp2_sum_choices,condition=="active"),aes(low_freq,lower_exposure_choice_prob,color=low_freq,fill=low_freq))+
  geom_bar(stat="identity",size=2,,alpha=0.05)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0)+
  theme_classic()+
  scale_x_discrete(labels=c("2","1","0"),limits=c("medium","low","no"))+
  ylab("Lower Exposure Choice Probability")+
  xlab("Lower Exposure Item")+
  geom_hline(yintercept=.5,linetype="dotted")+
  facet_wrap(~highest_freq_f)+
  ylim(0,1)+
  #scale_color_viridis(discrete=T)+
  scale_color_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  #scale_fill_viridis(discrete=T)+
  scale_fill_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(size=16),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")
exp2_sampling_pairs

ggsave(here(figure_path,"exp2_sampling_lowerExposureChoices.png"),width=11,height=7,bg="white")
Combined Sampling Choices and Exposure Pairs Figure
plot_grid(exp2_sampling_choices_plot,exp2_sampling_pairs, labels=c("A","B"))

ggsave(here(figure_path,"exp2_sampling_all.png"),width=11,height=7,bg="white")
Split By Block
p1 <- ggplot(subset(exp2_sum_choices_block,condition=="active"&block=="BLOCK 1"),aes(low_freq,lower_exposure_choice_prob,color=low_freq,fill=low_freq))+
  geom_bar(stat="identity",size=2,fill="white")+
  geom_errorbar(aes(ymin=ci.lower,ymax=ci.upper),width=0)+
  theme_classic()+
  scale_x_discrete(labels=c("2 Exposures","1 Exposure","0 Exposures"),limits=c("medium","low","no"))+
  ylab("Lower Exposure Choice Probability")+
  xlab("Lower Exposure Item")+
  geom_hline(yintercept=.5,linetype="dotted")+
  facet_wrap(~highest_freq_f)+
  ylim(0,1)+
  #scale_color_viridis(discrete=T)+
    scale_color_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  #scale_fill_viridis(discrete=T)+
  scale_fill_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(angle=90,vjust=0.5,size=16),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")

p2 <- ggplot(subset(exp2_sum_choices_block,condition=="active"&block=="BLOCK 2"),aes(low_freq,lower_exposure_choice_prob,color=low_freq,fill=low_freq))+
  geom_bar(stat="identity",size=2,fill="white")+
  geom_errorbar(aes(ymin=ci.lower,ymax=ci.upper),width=0)+
  theme_classic()+
  scale_x_discrete(labels=c("2 Exposures","1 Exposure","0 Exposures"),limits=c("medium","low","no"))+
  ylab("Lower Exposure Choice Probability")+
  xlab("Lower Exposure Item")+
  geom_hline(yintercept=.5,linetype="dotted")+
  facet_wrap(~highest_freq_f)+
  ylim(0,1)+
  #scale_color_viridis(discrete=T)+
    scale_color_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  #scale_fill_viridis(discrete=T)+
  scale_fill_manual(limits = c("medium","low","no"),values=c("#31688EFF", "#35B779FF", "#FDE725FF"))+
  theme(axis.title = element_text(size=20),
        axis.text.x  = element_text(angle=90,vjust=0.5,size=16),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")
plot_grid(p1,p2, labels=c("A","B"))

ggsave(here(figure_path,"exp2_sampling_lowerExposureChoices_by_block.png"),width=11,height=7)

Informativeness Difference Analysis

To test whether children were sensitive to differences in informativeness between object-label associations, we fit a logistic mixed-effects model predicting children’s likelihood of choosing a given item from its difference in informativeness to the alternative option (collapsing across blocks). We included random intercepts for participant and for each item option. We found that children’s likelihood of choosing an item increased as it became more informative relative to the alternative item presented on a given trial.

Model

#more complex model (singular fit)
#m=glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1+cur_exp_log_rel_freq_diff12|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active" & trial_type=="learning"),family=binomial)
m_inf_exp2 <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active" &trial_type=="learning"),family=binomial)
summary(m_inf_exp2)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1190.8   1214.9   -590.4   1180.8      919 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0027 -0.8345  0.4607  0.8211  2.0221 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev.
##  subject          (Intercept) 0.0000000 0.00000 
##  available_image1 (Intercept) 0.0193345 0.13905 
##  available_image2 (Intercept) 0.0003426 0.01851 
## Number of obs: 924, groups:  
## subject, 77; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.03857    0.08563   0.450    0.652    
## cur_exp_log_rel_freq_diff12  0.40577    0.04347   9.335   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.032 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m_inf_exp2, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.1292737 0.2064046
## cur_exp_log_rel_freq_diff12  0.3205769 0.4909577

Next, we conducted a series of robustness analyses to assess the sensitivity of the results.

Robustness: Interaction with Block
m=glmer(is_image1_choice~cur_exp_log_rel_freq_diff12*stim_set_c+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active" &trial_type=="learning"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 * stim_set_c +  
##     (1 | subject) + (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1193.5   1227.3   -589.7   1179.5      917 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1121 -0.8358  0.4325  0.8303  2.1289 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev.
##  subject          (Intercept) 0.0000000 0.00000 
##  available_image1 (Intercept) 0.0196187 0.14007 
##  available_image2 (Intercept) 0.0001295 0.01138 
## Number of obs: 924, groups:  
## subject, 77; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                             0.03913    0.08573   0.456    0.648    
## cur_exp_log_rel_freq_diff12             0.40800    0.04362   9.354   <2e-16 ***
## stim_set_c                             -0.01549    0.14117  -0.110    0.913    
## cur_exp_log_rel_freq_diff12:stim_set_c  0.09578    0.08581   1.116    0.264    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cr_____12 stm_s_
## cr_xp____12  0.032                 
## stim_set_c   0.019 -0.001          
## c_____12:__  0.007  0.070     0.045
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:7,]
##                                              2.5 %    97.5 %
## (Intercept)                            -0.12890209 0.2071708
## cur_exp_log_rel_freq_diff12             0.32250973 0.4934985
## stim_set_c                             -0.29216867 0.2611896
## cur_exp_log_rel_freq_diff12:stim_set_c -0.07239383 0.2639585
Robustness: Block 1 alone
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active" &trial_type=="learning"&stim_set==1),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning" &  
##     stim_set == 1)
## 
##      AIC      BIC   logLik deviance df.resid 
##    608.9    629.5   -299.4    598.9      457 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8118 -0.8608  0.5005  0.8464  1.9090 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  subject          (Intercept) 3.860e-09 6.213e-05
##  available_image1 (Intercept) 1.058e-02 1.028e-01
##  available_image2 (Intercept) 3.137e-02 1.771e-01
## Number of obs: 462, groups:  
## subject, 77; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.05558    0.12243   0.454     0.65    
## cur_exp_log_rel_freq_diff12  0.36449    0.05992   6.083 1.18e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.038 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.1843682 0.2955305
## cur_exp_log_rel_freq_diff12  0.2470446 0.4819427
Robustness: Block 2 alone
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active" &trial_type=="learning"&stim_set==2),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning" &  
##     stim_set == 2)
## 
##      AIC      BIC   logLik deviance df.resid 
##    587.4    608.0   -288.7    577.4      457 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0474 -0.8092 -0.0329  0.8253  2.3239 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  subject          (Intercept) 1.541e-10 1.241e-05
##  available_image1 (Intercept) 1.093e-01 3.306e-01
##  available_image2 (Intercept) 1.324e-08 1.151e-04
## Number of obs: 462, groups:  
## subject, 77; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.04922    0.15521   0.317    0.751    
## cur_exp_log_rel_freq_diff12  0.46918    0.06478   7.242 4.41e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.050 
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.2549939 0.3534295
## cur_exp_log_rel_freq_diff12  0.3422046 0.5961479
Interaction with age

Consistent with the age effect found for sampling lower exposure frequency items, there was a significant interaction with age.

m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12*age_c+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active"&trial_type=="learning"),glmerControl(optimizer="bobyqa"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 * age_c + (1 |  
##     subject) + (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1177.3   1211.1   -581.7   1163.3      917 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4455 -0.8766  0.3673  0.8504  2.4141 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev. 
##  subject          (Intercept) 3.863e-16 1.965e-08
##  available_image1 (Intercept) 2.508e-02 1.584e-01
##  available_image2 (Intercept) 0.000e+00 0.000e+00
## Number of obs: 924, groups:  
## subject, 77; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                        0.05508    0.09043   0.609    0.542    
## cur_exp_log_rel_freq_diff12        0.43475    0.04502   9.657  < 2e-16 ***
## age_c                             -0.02902    0.08736  -0.332    0.740    
## cur_exp_log_rel_freq_diff12:age_c  0.21913    0.05390   4.066 4.79e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cr_____12 age_c
## cr_xp____12 0.038                 
## age_c       0.088  0.070          
## cr_____12:_ 0.052  0.249     0.080
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:7,]
##                                        2.5 %    97.5 %
## (Intercept)                       -0.1221659 0.2323287
## cur_exp_log_rel_freq_diff12        0.3465115 0.5229789
## age_c                             -0.2002374 0.1421978
## cur_exp_log_rel_freq_diff12:age_c  0.1134891 0.3247717

However, each age group showed a significant effect of informativeness when considered alone, suggesting that children sampled more informative object-label associations across the full age range, with the sampling choices of older children showing a stronger effect of informativeness.

3-year-olds
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active"&trial_type=="learning"&age_group=="three-year-olds"),glmerControl(optimizer="bobyqa"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning" &  
##     age_group == "three-year-olds")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    365.1    383.0   -177.5    355.1      259 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.5061 -0.9305  0.6611  0.8723  1.3947 
## 
## Random effects:
##  Groups           Name        Variance  Std.Dev.
##  subject          (Intercept) 0.0013436 0.03666 
##  available_image1 (Intercept) 0.0007534 0.02745 
##  available_image2 (Intercept) 0.0000000 0.00000 
## Number of obs: 264, groups:  
## subject, 22; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                  0.07696    0.12658   0.608  0.54316   
## cur_exp_log_rel_freq_diff12  0.22467    0.07175   3.131  0.00174 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 -0.026
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                   2.5 %    97.5 %
## (Intercept)                 -0.17112458 0.3250522
## cur_exp_log_rel_freq_diff12  0.08405062 0.3652922
4-year-olds
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active"&trial_type=="learning"&age_group=="four-year-olds"),glmerControl(optimizer="bobyqa"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning" &  
##     age_group == "four-year-olds")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    397.1    415.7   -193.6    387.1      295 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.8948 -0.8771  0.5278  0.8200  1.9574 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  subject          (Intercept) 0        0       
##  available_image1 (Intercept) 0        0       
##  available_image2 (Intercept) 0        0       
## Number of obs: 300, groups:  
## subject, 25; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.01738    0.12139   0.143    0.886    
## cur_exp_log_rel_freq_diff12  0.37955    0.07453   5.093 3.53e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 -0.050
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.2205399 0.2552942
## cur_exp_log_rel_freq_diff12  0.2334812 0.5256158
5-year-olds
m <- glmer(is_image1_choice~cur_exp_log_rel_freq_diff12+(1|subject)+(1|available_image1)+(1|available_image2),data=filter(exp2_d,condition=="active"&trial_type=="learning"&age_group=="five-year-olds"),glmerControl(optimizer="bobyqa"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_image1_choice ~ cur_exp_log_rel_freq_diff12 + (1 | subject) +  
##     (1 | available_image1) + (1 | available_image2)
##    Data: filter(exp2_d, condition == "active" & trial_type == "learning" &  
##     age_group == "five-year-olds")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##    435.6    455.0   -212.8    425.6      355 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8690 -0.7291 -0.3585  0.7283  2.4281 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  subject          (Intercept) 0.00000  0.0000  
##  available_image1 (Intercept) 0.04395  0.2096  
##  available_image2 (Intercept) 0.00000  0.0000  
## Number of obs: 360, groups:  
## subject, 30; available_image1, 8; available_image2, 8
## 
## Fixed effects:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.07902    0.14052   0.562    0.574    
## cur_exp_log_rel_freq_diff12  0.61133    0.08058   7.586 3.29e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## cr_xp____12 0.127 
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")[4:5,]
##                                  2.5 %    97.5 %
## (Intercept)                 -0.1963968 0.3544421
## cur_exp_log_rel_freq_diff12  0.4533927 0.7692732

Plot

#create data frame to predict
exp2_inf_pX <- data.frame(cur_exp_log_rel_freq_diff12=seq(min(filter(exp2_d,condition=="active")$cur_exp_log_rel_freq_diff12,na.rm=T),max(filter(exp2_d,condition=="active")$cur_exp_log_rel_freq_diff12,na.rm=T),by=0.1))
exp2_inf_pY <- predictSE(m_inf_exp2,exp2_inf_pX,re.form=NA,type="response")
exp2_inf_pX <- exp2_inf_pX %>%
  mutate(
    fit=exp2_inf_pY$fit,
    se.fit=exp2_inf_pY$se.fit) %>%
  mutate(
    is_image1_choice = fit,
    y_lower = fit-se.fit,
    y_upper = fit+se.fit
  )

#helper image data
#image_data <- data.frame(x=c(-2,2),y=c(0.15,0.15),image_path = c(here("helper","informativeness_helper_fig_4_1.png"),here("helper","informativeness_helper_fig_1_4.png")))

#create plot
exp2_informativeness <- ggplot(exp2_inf_pX,aes(cur_exp_log_rel_freq_diff12,is_image1_choice))+
  geom_vline(xintercept=0,linetype="solid")+
  #geom_image(data=image_data,aes(x=x,y=y,image=image_path),size=0.2)+
  geom_violinh(data=filter(exp2_d,condition=="active"&trial_type=="learning"),aes(y=is_image1_choice,group=is_image1_choice),scale="count",width=0.1, trim=T)+
  geom_jitter(data=filter(exp2_d,condition=="active"&trial_type=="learning"),aes(y=is_image1_choice,group=is_image1_choice),height=0.01)+
  geom_smooth(aes(ymin=y_lower,ymax=y_upper),stat="identity")+
  geom_hline(yintercept=0.5,linetype="dotted")+
  coord_cartesian(xlim = c(-4,4), ylim = c(-0.05,1.05))+
  scale_x_continuous(breaks=seq(-4,4,1))+
  theme_classic(base_size=14)+
  xlab("Informativeness Difference Image 1 - Image 2")+
  ylab("Probability of Choosing Image 1")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))
exp2_informativeness

ggsave(here(figure_path,"exp2_informativeness_analysis.png"),width=9,height=6,bg="white")

Test Phase

Overall Accuracy

exp2_test_subj <- exp2_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,condition,stim_set) %>%
  summarize(
    mean_accuracy = mean(is_right,na.rm=TRUE)
  ) %>%
  mutate(block = ifelse(stim_set==1,"BLOCK 1","BLOCK 2"))

exp2_test_summarized <- exp2_test_subj %>%
  group_by(condition,stim_set, block) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
exp2_test_summarized %>%
  ungroup() %>%
  arrange(block) %>%
  select(-avg_accuracy_ci,-stim_set) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(block,condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Block","Condition", "N", "Average Accuracy","CI"),digits=3)
Block Condition N Average Accuracy CI
BLOCK 1 active 77 0.849 [0.809, 0.889]
BLOCK 1 passive 77 0.818 [0.775, 0.861]
BLOCK 1 passive_mismatched 77 0.782 [0.734, 0.831]
BLOCK 2 active 77 0.789 [0.737, 0.841]
BLOCK 2 passive 77 0.742 [0.688, 0.796]
BLOCK 2 passive_mismatched 77 0.800 [0.756, 0.845]

Main Model

To test for differences in word learning across conditions, we fit a logistic mixed-effects model predicting participants’ trial-by-trial accuracy from condition (dummy coded), experiment block (centered; Block 1 = -0.5, Block 2 = 0.5), and their interaction. We included random intercepts for participants, items, and yoked pairings (i.e., observations from participants who were yoked together were treated as non-independent).

#model 1 non-converging
#m <- glmer(is_right~condition_c*stim_set_c+(1+stim_set_c|subject)+(1|target_image)+(1+stim_set_c|yoked_id), subset(exp2_d, trial_type=="test"),family="binomial")
m <- glmer(is_right~condition*stim_set_c+(1|subject)+(1|target_image)+(1|yoked_id), subset(exp2_d, trial_type=="test"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## is_right ~ condition * stim_set_c + (1 | subject) + (1 | target_image) +  
##     (1 | yoked_id)
##    Data: subset(exp2_d, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   3564.2   3620.1  -1773.1   3546.2     3687 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4911  0.2564  0.3696  0.5170  1.2929 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.779079 0.88265 
##  yoked_id     (Intercept) 0.021254 0.14579 
##  target_image (Intercept) 0.003704 0.06086 
## Number of obs: 3696, groups:  subject, 231; yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                             1.77461    0.13577  13.071  < 2e-16 ***
## conditionpassive                       -0.29039    0.18177  -1.598  0.11015    
## conditionpassive_mismatched            -0.23998    0.18198  -1.319  0.18724    
## stim_set_c                             -0.45894    0.15626  -2.937  0.00331 ** 
## conditionpassive:stim_set_c            -0.04419    0.21338  -0.207  0.83592    
## conditionpassive_mismatched:stim_set_c  0.57977    0.21411   2.708  0.00677 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnp cndtn_ stm_s_ cnd:__
## conditnpssv -0.696                            
## cndtnpssv_m -0.696  0.516                     
## stim_set_c  -0.079  0.056  0.056              
## cndtnpss:__  0.054 -0.074 -0.040 -0.731       
## cndtnps_:__  0.059 -0.041 -0.033 -0.730  0.534
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: is_right
##                         Chisq Df Pr(>Chisq)    
## (Intercept)          170.8493  1  < 2.2e-16 ***
## condition              2.8853  2   0.236305    
## stim_set_c             8.6255  1   0.003315 ** 
## condition:stim_set_c  11.1502  2   0.003791 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

We found a significant condition by experiment block interaction. To investigate the source of the interaction, we estimated the simple effects of condition for each block by re-centering experiment block within the logistic-mixed effects model [note that the sign for coefficient estimates below is flipped in the paper for clarity].

Block 1

m <- glmer(is_right~condition*stim_set1+(1|subject)+(1|target_image)+(1|yoked_id), subset(exp2_d, trial_type=="test"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: 
## is_right ~ condition * stim_set1 + (1 | subject) + (1 | target_image) +  
##     (1 | yoked_id)
##    Data: subset(exp2_d, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   3564.2   3620.1  -1773.1   3546.2     3687 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4912  0.2564  0.3696  0.5170  1.2929 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.779089 0.88266 
##  yoked_id     (Intercept) 0.021261 0.14581 
##  target_image (Intercept) 0.003703 0.06086 
## Number of obs: 3696, groups:  subject, 231; yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                            2.00412    0.16193  12.376  < 2e-16 ***
## conditionpassive                      -0.26832    0.21747  -1.234  0.21727    
## conditionpassive_mismatched           -0.52994    0.21412  -2.475  0.01333 *  
## stim_set1                             -0.45896    0.15627  -2.937  0.00331 ** 
## conditionpassive:stim_set1            -0.04418    0.21339  -0.207  0.83599    
## conditionpassive_mismatched:stim_set1  0.57983    0.21412   2.708  0.00677 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnp cndtn_ stm_s1 cnd:_1
## conditnpssv -0.705                            
## cndtnpssv_m -0.720  0.531                     
## stim_set1   -0.549  0.405  0.412              
## cndtnpss:_1  0.398 -0.552 -0.301 -0.731       
## cndtnps_:_1  0.401 -0.296 -0.528 -0.730  0.534
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: is_right
##                        Chisq Df Pr(>Chisq)    
## (Intercept)         153.1678  1  < 2.2e-16 ***
## condition             6.1347  2   0.046545 *  
## stim_set1             8.6260  1   0.003314 ** 
## condition:stim_set1  11.1517  2   0.003788 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(m, method="Wald")
##                                            2.5 %     97.5 %
## .sig01                                        NA         NA
## .sig02                                        NA         NA
## .sig03                                        NA         NA
## (Intercept)                            1.6867343  2.3215069
## conditionpassive                      -0.6945488  0.1579140
## conditionpassive_mismatched           -0.9496108 -0.1102718
## stim_set1                             -0.7652464 -0.1526811
## conditionpassive:stim_set1            -0.4624058  0.3740524
## conditionpassive_mismatched:stim_set1  0.1601700  0.9994877

Block 2

exp2_d <- exp2_d %>%
  mutate(
    stim_set2 = ifelse(stim_set==1,-1,0)
  )
#remove yoked id due to non-convergence
m <- glmer(is_right~condition*stim_set2+(1|subject)+(1|target_image), subset(exp2_d, trial_type=="test"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition * stim_set2 + (1 | subject) + (1 | target_image)
##    Data: subset(exp2_d, trial_type == "test")
## 
##      AIC      BIC   logLik deviance df.resid 
##   3562.2   3611.9  -1773.1   3546.2     3688 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4862  0.2559  0.3697  0.5180  1.2884 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.800595 0.89476 
##  target_image (Intercept) 0.003617 0.06014 
## Number of obs: 3696, groups:  subject, 231; target_image, 8
## 
## Fixed effects:
##                                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                            1.54527    0.15117  10.222  < 2e-16 ***
## conditionpassive                      -0.31252    0.20525  -1.523  0.12785    
## conditionpassive_mismatched            0.04965    0.20946   0.237  0.81263    
## stim_set2                             -0.45897    0.15630  -2.936  0.00332 ** 
## conditionpassive:stim_set2            -0.04414    0.21343  -0.207  0.83615    
## conditionpassive_mismatched:stim_set2  0.57983    0.21416   2.707  0.00678 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnp cndtn_ stm_s2 cnd:_2
## conditnpssv -0.708                            
## cndtnpssv_m -0.693  0.509                     
## stim_set2    0.446 -0.331 -0.324              
## cndtnpss:_2 -0.330  0.454  0.238 -0.731       
## cndtnps_:_2 -0.324  0.241  0.483 -0.730  0.534
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: is_right
##                        Chisq Df Pr(>Chisq)    
## (Intercept)         104.4856  1  < 2.2e-16 ***
## condition             3.6992  2   0.157299    
## stim_set2             8.6225  1   0.003320 ** 
## condition:stim_set2  11.1466  2   0.003798 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(m, method="Wald")
##                                            2.5 %      97.5 %
## .sig01                                        NA          NA
## .sig02                                        NA          NA
## (Intercept)                            1.2489742  1.84156365
## conditionpassive                      -0.7148022  0.08975781
## conditionpassive_mismatched           -0.3608936  0.46019281
## stim_set2                             -0.7653115 -0.15261933
## conditionpassive:stim_set2            -0.4624522  0.37416748
## conditionpassive_mismatched:stim_set2  0.1600848  0.99956910

Three-way interaction with age

To test for possible differences in the effect of active learning across age, we fit a logistic mixed-effects model predicting participants’ trial-by-trial accuracy from condition (dummy coded), block (centered), age (centered), and their interactions. We included random intercepts for participants, items and for yoked pairings (i.e., observations from participants who were yoked together were treated as non-independent).

m <- glmer(is_right~condition*stim_set_c*age_c+(1|subject)+(1|target_image)+(1|yoked_id), subset(exp2_d, trial_type=="test"),glmerControl(optimizer="bobyqa"),family="binomial")
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ condition * stim_set_c * age_c + (1 | subject) + (1 |  
##     target_image) + (1 | yoked_id)
##    Data: subset(exp2_d, trial_type == "test")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   3573.6   3666.8  -1771.8   3543.6     3681 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5834  0.2459  0.3741  0.5103  1.2735 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.780310 0.88335 
##  yoked_id     (Intercept) 0.021795 0.14763 
##  target_image (Intercept) 0.003327 0.05768 
## Number of obs: 3696, groups:  subject, 231; yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                                               Estimate Std. Error z value
## (Intercept)                                   1.773416   0.135736  13.065
## conditionpassive                             -0.286209   0.181982  -1.573
## conditionpassive_mismatched                  -0.238873   0.182140  -1.311
## stim_set_c                                   -0.459235   0.156314  -2.938
## age_c                                        -0.084392   0.162870  -0.518
## conditionpassive:stim_set_c                  -0.048746   0.213655  -0.228
## conditionpassive_mismatched:stim_set_c        0.580706   0.214200   2.711
## conditionpassive:age_c                        0.056560   0.217643   0.260
## conditionpassive_mismatched:age_c             0.170228   0.222580   0.765
## stim_set_c:age_c                              0.001385   0.193209   0.007
## conditionpassive:stim_set_c:age_c             0.229222   0.257669   0.890
## conditionpassive_mismatched:stim_set_c:age_c  0.079857   0.261971   0.305
##                                              Pr(>|z|)    
## (Intercept)                                   < 2e-16 ***
## conditionpassive                              0.11578    
## conditionpassive_mismatched                   0.18969    
## stim_set_c                                    0.00330 ** 
## age_c                                         0.60435    
## conditionpassive:stim_set_c                   0.81953    
## conditionpassive_mismatched:stim_set_c        0.00671 ** 
## conditionpassive:age_c                        0.79496    
## conditionpassive_mismatched:age_c             0.44439    
## stim_set_c:age_c                              0.99428    
## conditionpassive:stim_set_c:age_c             0.37368    
## conditionpassive_mismatched:stim_set_c:age_c  0.76049    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) cndtnp cndtn_ stm_s_ age_c  cnd:__ cn_:__ cndt:_ cnd_:_
## conditnpssv -0.696                                                        
## cndtnpssv_m -0.697  0.516                                                 
## stim_set_c  -0.079  0.056  0.056                                          
## age_c        0.017 -0.013 -0.013  0.003                                   
## cndtnpss:__  0.053 -0.074 -0.040 -0.730 -0.002                            
## cndtnps_:__  0.059 -0.041 -0.032 -0.730 -0.002  0.533                     
## cndtnpssv:_ -0.018 -0.002  0.011 -0.001 -0.738  0.016  0.001              
## cndtnpss_:_ -0.009  0.009  0.006 -0.003 -0.721  0.001  0.007  0.539       
## stm_st_c:g_  0.002 -0.002 -0.002  0.004 -0.077 -0.002 -0.003  0.058  0.056
## cndtnp:__:_  0.002  0.015  0.001 -0.003  0.057 -0.015  0.003 -0.079 -0.042
## cndtn_:__:_ -0.001  0.001  0.006 -0.003  0.057  0.002  0.006 -0.043 -0.033
##             st__:_ c:__:_
## conditnpssv              
## cndtnpssv_m              
## stim_set_c               
## age_c                    
## cndtnpss:__              
## cndtnps_:__              
## cndtnpssv:_              
## cndtnpss_:_              
## stm_st_c:g_              
## cndtnp:__:_ -0.749       
## cndtn_:__:_ -0.737  0.553
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: is_right
##                               Chisq Df Pr(>Chisq)    
## (Intercept)                170.6987  1  < 2.2e-16 ***
## condition                    2.8144  2   0.244828    
## stim_set_c                   8.6313  1   0.003304 ** 
## age_c                        0.2685  1   0.604350    
## condition:stim_set_c        11.2621  2   0.003585 ** 
## condition:age_c              0.6176  2   0.734331    
## stim_set_c:age_c             0.0001  1   0.994280    
## condition:stim_set_c:age_c   0.8417  2   0.656491    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Plot the condition effect for each age group

exp2_test_subj_block_age_group <- exp2_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,stim_set,condition,age_group) %>%
  summarize(
    mean_accuracy = mean(is_right,na.rm=TRUE)
  ) %>%
  mutate(
    age_group_f = factor(age_group, levels=c("three-year-olds","four-year-olds","five-year-olds")))

exp2_test_summarized_block_age_group <- exp2_test_subj_block_age_group %>%
  group_by(condition,stim_set,age_group_f) %>%
  summarize(
    N=n(),
    avg_accuracy = mean(mean_accuracy),
    avg_accuracy_ci = qt(0.975, N-1)*sd(mean_accuracy,na.rm=TRUE)/sqrt(N),
    avg_accuracy_lower_ci = avg_accuracy - avg_accuracy_ci,
    avg_accuracy_upper_ci = avg_accuracy + avg_accuracy_ci,
  )
exp2_test_summarized_block_age_group %>%
  ungroup() %>%
  select(-avg_accuracy_ci) %>%
  mutate(
    ci = str_c("[",round(avg_accuracy_lower_ci,3),", ", round(avg_accuracy_upper_ci,3),"]")) %>%
  select(stim_set,age_group_f,condition,N,avg_accuracy,ci) %>%
  kable(col.names=c("Block","Age Group","Condition", "N", "Average Accuracy","CI"),digits=3)
Block Age Group Condition N Average Accuracy CI
1 three-year-olds active 22 0.852 [0.787, 0.918]
1 four-year-olds active 25 0.850 [0.773, 0.927]
1 five-year-olds active 30 0.846 [0.775, 0.917]
2 three-year-olds active 22 0.801 [0.708, 0.894]
2 four-year-olds active 25 0.775 [0.68, 0.87]
2 five-year-olds active 30 0.792 [0.699, 0.884]
1 three-year-olds passive 22 0.847 [0.753, 0.94]
1 four-year-olds passive 25 0.785 [0.711, 0.859]
1 five-year-olds passive 30 0.825 [0.755, 0.895]
2 three-year-olds passive 22 0.727 [0.603, 0.851]
2 four-year-olds passive 25 0.740 [0.647, 0.833]
2 five-year-olds passive 30 0.754 [0.671, 0.838]
1 three-year-olds passive_mismatched 22 0.756 [0.643, 0.868]
1 four-year-olds passive_mismatched 25 0.815 [0.743, 0.887]
1 five-year-olds passive_mismatched 30 0.775 [0.694, 0.856]
2 three-year-olds passive_mismatched 22 0.801 [0.718, 0.884]
2 four-year-olds passive_mismatched 25 0.785 [0.713, 0.857]
2 five-year-olds passive_mismatched 30 0.812 [0.731, 0.894]
ggplot(exp2_test_summarized_block_age_group,aes(condition,avg_accuracy,color=condition,fill=condition))+
  geom_half_violin(data= exp2_test_subj,aes(y=mean_accuracy),position = position_nudge(x = -.1, y = 0), width=0.8,adjust=1.5,trim = T, alpha = .8,color=NA,side="l")+
  geom_point(stat="identity",size=4,position = position_nudge(x = .2, y = 0))+
  geom_errorbar(aes(ymin=avg_accuracy_lower_ci,ymax=avg_accuracy_upper_ci),width=0,position = position_nudge(x = .2, y = 0))+
  geom_jitter(data= exp2_test_subj,aes(y=mean_accuracy),size=2,width=0.05,height=0.025,alpha=0.4,stroke=NA)+
  #geom_beeswarm(data= exp2_test_subj,aes(y=mean_accuracy),cex=0.5,alpha=0.5)+
  theme_cowplot()+
  theme(legend.position="none")+
  geom_hline(yintercept=0.5,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  ylim(-0.04,1.04)+
  scale_x_discrete(
    breaks=c("active","passive","passive_mismatched"),
    labels=c("Active","Yoked\nPassive","Yoked\n Passive\n Mismatch"))+
  ylab("Word Learning Accuracy")+
  xlab("Condition")+
  facet_wrap(~stim_set+age_group_f,ncol=3)

ggsave(here(figure_path,"exp2_accuracy_by_age_group.png"),width=9,height=9,bg = "white")

Plot

ggplot(exp2_test_summarized,aes(condition,avg_accuracy,color=condition,fill=condition))+
  geom_half_violin(data= exp2_test_subj,aes(y=mean_accuracy),position = position_nudge(x = -.1, y = 0), width=0.8,adjust=1.5,trim = T, alpha = .8,color=NA,side="l")+
  geom_point(stat="identity",size=4,position = position_nudge(x = .2, y = 0))+
  geom_errorbar(aes(ymin=avg_accuracy_lower_ci,ymax=avg_accuracy_upper_ci),width=0,position = position_nudge(x = .2, y = 0))+
  geom_jitter(data= exp2_test_subj,aes(y=mean_accuracy),size=2,width=0.05,height=0.025,alpha=0.4,stroke=NA)+
  #geom_beeswarm(data= exp2_test_subj,aes(y=mean_accuracy),cex=0.5,alpha=0.5)+
  theme_cowplot()+
  theme(legend.position="none")+
  geom_hline(yintercept=0.5,linetype="dashed")+
  theme(axis.title = element_text(size=20),
        axis.text  = element_text(size=16))+
  scale_color_brewer(palette="Set1")+
  scale_fill_brewer(palette="Set1")+
  ylim(-0.04,1.04)+
  scale_x_discrete(
    breaks=c("active","passive","passive_mismatched"),
    labels=c("Active","Yoked\nPassive","Yoked\n Passive\n Mismatch"))+
  ylab("Word Learning Accuracy")+
  xlab("Condition")+
  facet_grid(~block)

ggsave(here(figure_path,"exp2_accuracy_by_block.png"),width=9,height=6,bg = "white")

Relationships between Exposure, Sampling, and Test

We conducted several exploratory analyses to investigate the nature of the relationship between participants’ training during the Exposure and Sampling Phases and their subsequent test accuracy.

#### combine test and choice data
exp2_subj_accuracy <-  exp2_d %>%
  filter(trial_type=="test") %>%
  group_by(subject,yoked_id,age,age_group,condition,stim_set,target_kind, target_image) %>%
  summarize(acc=mean(is_right),
            mean_rt = mean(rt),
            median_rt = median(rt))

#overall item-level
exp2_test_choice_type <-left_join(exp2_subj_accuracy, exp2_exposure_sampling_complete) %>%
  mutate(
    total_frequency = exposure_frequency + choice_num
  )

#trial-by-trial level
exp2_test_d <- exp2_d %>%
  filter(trial_type=="test") %>%
  select(subject,trial_type,trial_num,yoked_id,age,age_group,condition,stim_set,stim_set_c,stim_set1,stim_set2,target_kind, target_image,is_right)

exp2_test_d_choice_type <-exp2_test_d %>%
  left_join(exp2_exposure_sampling_complete) %>%
  mutate(
    total_frequency = exposure_frequency + choice_num
  )

Frequency of Experience and Test Accuracy

Participants’ frequency of experience during the Exposure and Sampling Phases predicted test accuracy.

Overall Frequency and its Relationship to Test

First, we asked whether the combined frequency of children’s exposure to each object-label association during the Exposure Phase and the Sampling Phase would predict their later test accuracy. We fit a logistic mixed-effects model predicting children’s trial-by-trial accuracy on a given item from the total frequency with which they experienced that item during the Exposure Phase and the Sampling Phase combined (total frequency), experiment block, and their interaction. We included random intercepts for participants, items, and yoked pairings. Overall, children were more accurate on items they experienced more frequently during training.

## total frequency
#significant interaction with block
m <- glmer(is_right ~total_frequency*stim_set_c+(1|subject)+(1|target_image)+(1|yoked_id),data=exp2_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ total_frequency * stim_set_c + (1 | subject) + (1 |  
##     target_image) + (1 | yoked_id)
##    Data: exp2_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##   3559.4   3602.9  -1772.7   3545.4     3689 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6502  0.2689  0.3712  0.5124  1.3356 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.801569 0.89530 
##  yoked_id     (Intercept) 0.014313 0.11964 
##  target_image (Intercept) 0.003251 0.05702 
## Number of obs: 3696, groups:  subject, 231; yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                            Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 1.30959    0.11358  11.530  < 2e-16 ***
## total_frequency             0.08315    0.02401   3.464 0.000533 ***
## stim_set_c                  0.02909    0.18233   0.160 0.873246    
## total_frequency:stim_set_c -0.09092    0.04797  -1.895 0.058058 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ttl_fr stm_s_
## totl_frqncy -0.699              
## stim_set_c  -0.046  0.070       
## ttl_frqn:__  0.052 -0.106 -0.881
confint(m, method="Wald")
##                                  2.5 %      97.5 %
## .sig01                              NA          NA
## .sig02                              NA          NA
## .sig03                              NA          NA
## (Intercept)                 1.08696772 1.532209575
## total_frequency             0.03609887 0.130202207
## stim_set_c                 -0.32827750 0.386455278
## total_frequency:stim_set_c -0.18494935 0.003104095

Below is a quick plot visualizing this effect.

#item-level
# total frequency
ggplot(exp2_test_d_choice_type,aes(total_frequency,is_right,color=condition))+
  geom_beeswarm(aes(group=total_frequency), cex=0.02,alpha=0.5)+
  geom_smooth(method="lm")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1), limits=c(-0.1,1.1))+
  facet_wrap(~stim_set)

There was a marginal interaction between total frequency and experiment block, driven by the fact that the total frequency effect was robust in Block 1 but not in Block 2.

Block 1

Results for the logistic mixed-effects model, recentered on Block 1 to estimate the Block 1 total frequency effect.

#Block 1
m <- glmer(is_right ~total_frequency*stim_set1+(1|subject)+(1|target_image)+(1|yoked_id),data=exp2_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ total_frequency * stim_set1 + (1 | subject) + (1 |  
##     target_image) + (1 | yoked_id)
##    Data: exp2_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##   3559.4   3602.9  -1772.7   3545.4     3689 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6502  0.2689  0.3712  0.5124  1.3356 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.801570 0.89530 
##  yoked_id     (Intercept) 0.014314 0.11964 
##  target_image (Intercept) 0.003251 0.05702 
## Number of obs: 3696, groups:  subject, 231; yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.29505    0.14887   8.699  < 2e-16 ***
## total_frequency            0.12861    0.03568   3.604 0.000313 ***
## stim_set1                  0.02909    0.18233   0.160 0.873260    
## total_frequency:stim_set1 -0.09092    0.04797  -1.895 0.058056 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ttl_fr stm_s1
## totl_frqncy -0.777              
## stim_set1   -0.647  0.640       
## ttl_frqn:_1  0.580 -0.743 -0.881
confint(m, method="Wald")
##                                 2.5 %      97.5 %
## .sig01                             NA          NA
## .sig02                             NA          NA
## .sig03                             NA          NA
## (Intercept)                1.00327486 1.586822202
## total_frequency            0.05867278 0.198550543
## stim_set1                 -0.32827617 0.386446736
## total_frequency:stim_set1 -0.18494817 0.003103281
Block 2

Results for the logistic mixed-effects model, recentered on Block 2 to estimate the Block 2 total frequency effect.

#Block 2
m <- glmer(is_right ~total_frequency*stim_set2+(1|subject)+(1|target_image)+(1|yoked_id),data=exp2_test_d_choice_type,family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ total_frequency * stim_set2 + (1 | subject) + (1 |  
##     target_image) + (1 | yoked_id)
##    Data: exp2_test_d_choice_type
## 
##      AIC      BIC   logLik deviance df.resid 
##   3559.4   3602.9  -1772.7   3545.4     3689 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6502  0.2689  0.3712  0.5124  1.3356 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.801566 0.89530 
##  yoked_id     (Intercept) 0.014313 0.11964 
##  target_image (Intercept) 0.003251 0.05702 
## Number of obs: 3696, groups:  subject, 231; yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                1.32413    0.14235   9.302   <2e-16 ***
## total_frequency            0.03769    0.03209   1.174   0.2402    
## stim_set2                  0.02908    0.18233   0.160   0.8733    
## total_frequency:stim_set2 -0.09092    0.04797  -1.895   0.0581 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) ttl_fr stm_s2
## totl_frqncy -0.774              
## stim_set2    0.604 -0.606       
## ttl_frqn:_2 -0.523  0.668 -0.881
confint(m, method="Wald")
##                                2.5 %      97.5 %
## .sig01                            NA          NA
## .sig02                            NA          NA
## .sig03                            NA          NA
## (Intercept)                1.0451367 1.603121045
## total_frequency           -0.0252106 0.100590948
## stim_set2                 -0.3282727 0.386435569
## total_frequency:stim_set2 -0.1849447 0.003103576

Exposure and Sampling Frequency and its Relationship to Test

Next, we aimed to tease apart the specific effect of frequency within the Exposure Phase and frequency within the Sampling Phase on test accuracy. We fit a logistic mixed-effects model predicting trial-by-trial test accuracy from the interaction between Exposure Phase frequency and experiment block (centered), and the interaction between Sampling Phase frequency and experiment block, as well as all lower-order effects. We included random intercepts for participants, items, and yoked pairings. Both exposure frequency and sampling frequency predicted accuracy at test while controlling for each other.

#exposure frequency and choices
#check interaction with block
m <- glmer(is_right ~(exposure_frequency+choice_num)*stim_set_c+(1|subject)+(1|target_image)+(1|yoked_id),data=exp2_test_d_choice_type,family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ (exposure_frequency + choice_num) * stim_set_c + (1 |  
##     subject) + (1 | target_image) + (1 | yoked_id)
##    Data: exp2_test_d_choice_type
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   3562.9   3618.8  -1772.4   3544.9     3687 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.5864  0.2673  0.3703  0.5122  1.3669 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.802212 0.89566 
##  yoked_id     (Intercept) 0.014547 0.12061 
##  target_image (Intercept) 0.003859 0.06212 
## Number of obs: 3696, groups:  subject, 231; yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                    1.25947    0.13280   9.484  < 2e-16 ***
## exposure_frequency             0.07985    0.02439   3.274  0.00106 ** 
## choice_num                     0.12132    0.05752   2.109  0.03492 *  
## stim_set_c                     0.02812    0.22887   0.123  0.90222    
## exposure_frequency:stim_set_c -0.09093    0.04874  -1.866  0.06211 .  
## choice_num:stim_set_c         -0.09047    0.11545  -0.784  0.43324    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_ chc_nm stm_s_ ex_:__
## expsr_frqnc -0.492                            
## choice_num  -0.717  0.243                     
## stim_set_c  -0.060  0.045  0.072              
## expsr_fr:__  0.033 -0.104 -0.026 -0.578       
## chc_nm:st__  0.064 -0.028 -0.095 -0.842  0.240
confint(m, method="Wald")
##                                      2.5 %      97.5 %
## .sig01                                  NA          NA
## .sig02                                  NA          NA
## .sig03                                  NA          NA
## (Intercept)                    0.999190883 1.519744899
## exposure_frequency             0.032052046 0.127654984
## choice_num                     0.008586385 0.234055555
## stim_set_c                    -0.420465450 0.476703381
## exposure_frequency:stim_set_c -0.186461082 0.004604027
## choice_num:stim_set_c         -0.316750452 0.135804212
#vif(m) #use this command to double-check variance inflation

Below are two quick plots visualizing these effects.

#sampling choices
ggplot(exp2_test_d_choice_type,aes(choice_num,is_right,color=condition))+
  geom_smooth(method="lm")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1), limits=c(-0.1,1.1))+
  facet_wrap(~stim_set)

#exposure frequency
ggplot(exp2_test_d_choice_type,aes(exposure_frequency,is_right,color=condition))+
  geom_smooth(method="lm")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1), limits=c(-0.1,1.1))+
  facet_wrap(~stim_set)

Effects for Block 1 by Condition

Given that we observed condition effects only in Block 1, we investigated the effects of exposure and sampling frequency specifically in Block 1 for each condition separately. The effect of exposure frequency was largely robust across each condition. However, sampling frequency only predicted test accuracy in the Active condition, not in either of the Yoked Passive conditions.

#Block 1
#active condition only
m <- glmer(is_right ~exposure_frequency+choice_num+(1|subject)+(1|target_image),data=filter(exp2_test_d_choice_type,stim_set==1 & condition=="active"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ exposure_frequency + choice_num + (1 | subject) +  
##     (1 | target_image)
##    Data: filter(exp2_test_d_choice_type, stim_set == 1 & condition ==  
##     "active")
## 
##      AIC      BIC   logLik deviance df.resid 
##    504.0    526.1   -247.0    494.0      611 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7324  0.2226  0.2783  0.4070  1.1667 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 1.27716  1.1301  
##  target_image (Intercept) 0.08226  0.2868  
## Number of obs: 616, groups:  subject, 77; target_image, 8
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         1.35353    0.39159   3.457 0.000547 ***
## exposure_frequency  0.13988    0.07378   1.896 0.057978 .  
## choice_num          0.37273    0.17523   2.127 0.033414 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_
## expsr_frqnc -0.545       
## choice_num  -0.718  0.367
confint(m, method="Wald")
##                           2.5 %    97.5 %
## .sig01                       NA        NA
## .sig02                       NA        NA
## (Intercept)         0.586039608 2.1210291
## exposure_frequency -0.004729336 0.2844927
## choice_num          0.029283573 0.7161757
coefs_active <- data.frame(
  condition="Active",
  N=summary(m)$ngrps[1],
  predictor=row.names(summary(m)$coefficients),
  beta=summary(m)$coefficients[,1],
  se=summary(m)$coefficients[,2],
  z=summary(m)$coefficients[,3],
  p=summary(m)$coefficients[,4]
)
#passive mismatched condition only
m <- glmer(is_right ~exposure_frequency+choice_num+(1|subject)+(1|target_image),data=filter(exp2_test_d_choice_type,stim_set==1 & condition=="passive_mismatched"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ exposure_frequency + choice_num + (1 | subject) +  
##     (1 | target_image)
##    Data: filter(exp2_test_d_choice_type, stim_set == 1 & condition ==  
##     "passive_mismatched")
## 
##      AIC      BIC   logLik deviance df.resid 
##    618.1    640.2   -304.0    608.1      611 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0098  0.2425  0.3275  0.5125  1.2538 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 1.251    1.118   
##  target_image (Intercept) 0.000    0.000   
## Number of obs: 616, groups:  subject, 77; target_image, 8
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         1.23734    0.30278   4.087 4.38e-05 ***
## exposure_frequency  0.11919    0.05975   1.995   0.0461 *  
## choice_num          0.08790    0.14469   0.608   0.5435    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_
## expsr_frqnc -0.356       
## choice_num  -0.708  0.024
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m, method="Wald")
##                           2.5 %    97.5 %
## .sig01                       NA        NA
## .sig02                       NA        NA
## (Intercept)         0.643909868 1.8307785
## exposure_frequency  0.002077002 0.2363071
## choice_num         -0.195684680 0.3714821
coefs_passive_mis <- data.frame(
  condition="Yoked Passive\nMismatch",
  N=summary(m)$ngrps[1],
  predictor=row.names(summary(m)$coefficients),
  beta=summary(m)$coefficients[,1],
  se=summary(m)$coefficients[,2],
  z=summary(m)$coefficients[,3],
  p=summary(m)$coefficients[,4]
)
#passive condition
m <- glmer(is_right ~exposure_frequency+choice_num+(1|subject)+(1|target_image),data=filter(exp2_test_d_choice_type,stim_set==1 & condition=="passive"),family=binomial)
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ exposure_frequency + choice_num + (1 | subject) +  
##     (1 | target_image)
##    Data: filter(exp2_test_d_choice_type, stim_set == 1 & condition ==  
##     "passive")
## 
##      AIC      BIC   logLik deviance df.resid 
##    565.5    587.6   -277.8    555.5      611 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2542  0.2475  0.3220  0.4477  1.4918 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 1.06620  1.0326  
##  target_image (Intercept) 0.02276  0.1509  
## Number of obs: 616, groups:  subject, 77; target_image, 8
## 
## Fixed effects:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         1.30987    0.35529   3.687 0.000227 ***
## exposure_frequency  0.16041    0.06947   2.309 0.020931 *  
## choice_num          0.13837    0.15853   0.873 0.382743    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_
## expsr_frqnc -0.561       
## choice_num  -0.781  0.372
confint(m, method="Wald")
##                          2.5 %    97.5 %
## .sig01                      NA        NA
## .sig02                      NA        NA
## (Intercept)         0.61351913 2.0062172
## exposure_frequency  0.02426213 0.2965666
## choice_num         -0.17233719 0.4490796
coefs_passive <- data.frame(
  condition="Yoked Passive",
  N=summary(m)$ngrps[1],
  predictor=row.names(summary(m)$coefficients),
  beta=summary(m)$coefficients[,1],
  se=summary(m)$coefficients[,2],
  z=summary(m)$coefficients[,3],
  p=summary(m)$coefficients[,4]
)

#combine
coefs <- bind_rows(coefs_active,coefs_passive_mis,coefs_passive) %>%
  mutate(ci = qt(0.975, N-1)*se,
         lower_ci = beta-ci,
         upper_ci=beta+ci)
coefs$predictor=factor(coefs$predictor,levels=c("(Intercept)","choice_num","exposure_frequency","Yoked Passive\nMismatch"))
coefs$condition=factor(coefs$condition, levels=c("Active","Yoked Passive","Yoked Passive\nMismatch"))
ggplot(filter(coefs, predictor %in% c("choice_num","exposure_frequency")),aes(predictor,beta, color=condition))+
  geom_point(size=5)+
  geom_errorbar(aes(ymin=lower_ci,ymax=upper_ci),width=0)+
  geom_hline(yintercept=0, linetype="dashed")+
  facet_wrap(~condition)+
  coord_flip()+
  scale_x_discrete(
    breaks=c("exposure_frequency","choice_num"),
    labels=c("Frequency\nExposure Phase","Frequency\nSampling Phase")
  )+
  ylab("Model Beta Coefficient")+
  xlab("Predictor Variable")+
  scale_color_brewer(palette="Set1")+
  theme(axis.title = element_text(size=20),
        axis.text.y=element_text(size=16),
        strip.text=element_text(size=16),
        legend.position="none")

ggsave(here(figure_path,"predicting_test_accuracy_from_exposure_block1.png"),width=9,height=6)
Interaction with Condition

Note that the pattern of effects across condition above should be interpreted with caution, given that we did not observe a significant interaction between exposure frequency and condition or sampling frequency and condition.

#note no interaction
#had to remove the by-item random intercept due to non-convergence
m <- glmer(is_right ~(exposure_frequency+choice_num)*condition+(1|subject),data=filter(exp2_test_d_choice_type,stim_set==1),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ (exposure_frequency + choice_num) * condition + (1 |  
##     subject)
##    Data: filter(exp2_test_d_choice_type, stim_set == 1)
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1678.8   1734.1   -829.4   1658.8     1838 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7935  0.2288  0.3157  0.4355  1.5721 
## 
## Random effects:
##  Groups  Name        Variance Std.Dev.
##  subject (Intercept) 1.171    1.082   
## Number of obs: 1848, groups:  subject, 231
## 
## Fixed effects:
##                                                 Estimate Std. Error z value
## (Intercept)                                     1.328899   0.364052   3.650
## exposure_frequency                              0.136383   0.072815   1.873
## choice_num                                      0.353397   0.170281   2.075
## conditionpassive                               -0.007004   0.502535  -0.014
## conditionpassive_mismatched                    -0.103543   0.465536  -0.222
## exposure_frequency:conditionpassive             0.024298   0.100368   0.242
## exposure_frequency:conditionpassive_mismatched -0.017983   0.093922  -0.191
## choice_num:conditionpassive                    -0.211575   0.232432  -0.910
## choice_num:conditionpassive_mismatched         -0.266162   0.222965  -1.194
##                                                Pr(>|z|)    
## (Intercept)                                    0.000262 ***
## exposure_frequency                             0.061068 .  
## choice_num                                     0.037952 *  
## conditionpassive                               0.988880    
## conditionpassive_mismatched                    0.823989    
## exposure_frequency:conditionpassive            0.808713    
## exposure_frequency:conditionpassive_mismatched 0.848161    
## choice_num:conditionpassive                    0.362683    
## choice_num:conditionpassive_mismatched         0.232579    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_ chc_nm cndtnp cndtn_ exps_: exp_:_ chc_n:
## expsr_frqnc -0.598                                                 
## choice_num  -0.786  0.366                                          
## conditnpssv -0.716  0.437  0.573                                   
## cndtnpssv_m -0.773  0.472  0.619  0.559                            
## expsr_frqn:  0.438 -0.724 -0.264 -0.603 -0.342                     
## expsr_frq:_  0.467 -0.774 -0.283 -0.339 -0.518  0.561              
## chc_nm:cndt  0.579 -0.267 -0.731 -0.802 -0.454  0.369  0.207       
## chc_nm:cnd_  0.602 -0.279 -0.763 -0.438 -0.771  0.202  0.225  0.558
Anova(m,type="III")
## Analysis of Deviance Table (Type III Wald chisquare tests)
## 
## Response: is_right
##                                Chisq Df Pr(>Chisq)    
## (Intercept)                  13.3247  1  0.0002619 ***
## exposure_frequency            3.5081  1  0.0610677 .  
## choice_num                    4.3072  1  0.0379517 *  
## condition                     0.0672  2  0.9669468    
## exposure_frequency:condition  0.2150  2  0.8980639    
## choice_num:condition          1.5113  2  0.4697157    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Exposure Difference Analysis

Next, we sought to understand the relationship between the decrease in accuracy in the Yoked Passive Mismatch condition and the degree of mismatch in exposure frequency for yoked participants in the passive and active conditions. Here, we exploited variation across yoked participant pairs in the magnitude of difference between word learning experiences during the Exposure Phase due to our randomization procedure. Because the frequency of experiencing each object-label association was randomized in Yoked Passive Mismatch participants, yoked participant pairs had similar learning experiences relative to their Active yoked counterparts for some items during the Exposure Phase (similar frequencies of seeing each object-label association), and dramatically mismatched learning experiences for others (e.g., an active participant saw an object-label association five times that a yoked mismatch participant saw just once or never). On a learning-dependent account of why active control benefits word learning, a greater degree of mismatch in initial learning experience predicts a larger difference in word learning at test. Did participants in the Yoked Passive Mismatch condition perform better on items they had more exposure to than their yoked counterpart in the Active condition and worse on items they had less exposure to than their yoked counterpart?

To investigate this question, we fit a logistic mixed-effects model predicting the trial-by-trial accuracy of participants in the Yoked Passive Mismatch condition from the difference in exposure frequency for a given item (exposure frequency in the Yoked Passive Mismatch condition – exposure frequency in the Active condition for a given yoked pair and item), experiment block (centered) and their interaction. We included random intercepts for participants and items. Accuracy in the Yoked Passive Mismatch condition increased with larger exposure frequencies relative to the Active condition.

#interaction between exposure difference and block
m <- glmer(is_right ~exposure_difference*stim_set_c+(1|subject)+(1|target_image),data=filter(exp2_test_d_choice_type, condition=="passive_mismatched"),family=binomial,glmerControl(optimizer="bobyqa"))
summary(m)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: is_right ~ exposure_difference * stim_set_c + (1 | subject) +  
##     (1 | target_image)
##    Data: filter(exp2_test_d_choice_type, condition == "passive_mismatched")
## Control: glmerControl(optimizer = "bobyqa")
## 
##      AIC      BIC   logLik deviance df.resid 
##   1222.3   1253.0   -605.2   1210.3     1226 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0025  0.2899  0.4022  0.5200  1.2369 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  subject      (Intercept) 0.6709   0.8191  
##  target_image (Intercept) 0.0000   0.0000  
## Number of obs: 1232, groups:  subject, 77; target_image, 8
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     1.51946    0.12487  12.168   <2e-16 ***
## exposure_difference             0.05579    0.02749   2.029   0.0424 *  
## stim_set_c                      0.12711    0.14672   0.866   0.3863    
## exposure_difference:stim_set_c  0.03006    0.05493   0.547   0.5842    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_ stm_s_
## expsr_dffrn  0.056              
## stim_set_c   0.025  0.026       
## expsr_df:__  0.018 -0.018  0.073
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
confint(m,method="Wald")
##                                       2.5 %    97.5 %
## .sig01                                   NA        NA
## .sig02                                   NA        NA
## (Intercept)                     1.274707351 1.7642067
## exposure_difference             0.001908501 0.1096720
## stim_set_c                     -0.160458033 0.4146703
## exposure_difference:stim_set_c -0.077600783 0.1377287
#quick overview plot
ggplot(filter(exp2_test_d_choice_type,condition=="passive_mismatched"),aes(exposure_difference,is_right))+
  geom_beeswarm(aes(group=exposure_difference), cex=0.02,alpha=0.01)+
  geom_smooth(method="loess")+
  scale_y_continuous(breaks=c(0,0.2,0.4,0.6,0.8,1), limits=c(-0.1,1.1))+
  facet_wrap(~stim_set)

Robustness Analysis: Predicting item-level accuracy differences

We also conducted a robustness analysis in which we investigated the difference in accuracy between yoked pairings in the Active and Yoked Passive Mismatch condition, depending on differences in exposure frequency.

#"landscape" analysis comparing active vs. passive mismatched
active <- exp2_test_d_choice_type %>%
  filter(condition %in% c("active")) %>%
  group_by(yoked_id,stim_set,yoked_active_kind,target_image,choice_num,exposure_frequency) %>%
  dplyr::summarise(accuracy=mean(is_right)) %>%
  rename(active_accuracy=accuracy, active_exposure_frequency=exposure_frequency)

passive_m <- exp2_test_d_choice_type %>%
  filter(condition %in% c("passive_mismatched")) %>%
  group_by(yoked_id,stim_set,exposure_kind,yoked_active_kind,target_image,choice_num,exposure_frequency) %>%
  dplyr::summarise(accuracy=mean(is_right))  %>%
  rename(passive_m_accuracy=accuracy,passive_m_exposure_frequency=exposure_frequency)

yoked_item_accuracy <- active %>%
  full_join(passive_m) %>%
  mutate(
    accuracy_difference = active_accuracy - passive_m_accuracy,
    exposure_diff_categorical = case_when(
      active_exposure_frequency - passive_m_exposure_frequency == 0 ~ 0,
      active_exposure_frequency - passive_m_exposure_frequency > 0 ~ 1,
      active_exposure_frequency - passive_m_exposure_frequency < 0 ~ -1
    ),
    exposure_difference=active_exposure_frequency - passive_m_exposure_frequency) %>%
  mutate(stim_set_c=stim_set-1.5)

To test whether exposure difference predicted item-level accuracy difference, we fit a linear mixed-effects model predicting difference in test accuracy from difference in exposure frequency for a given item, experiment block (centered), and their interaction, including random intercepts for yoked pairing and item. Exposure difference was marginally related to difference in accuracy.

#interaction
m <- lmer(accuracy_difference~exposure_difference*stim_set_c+(1|yoked_id)+(1|target_image),data=yoked_item_accuracy,control=lmerControl(optimizer="bobyqa"))
summary(m)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: accuracy_difference ~ exposure_difference * stim_set_c + (1 |  
##     yoked_id) + (1 | target_image)
##    Data: yoked_item_accuracy
## Control: lmerControl(optimizer = "bobyqa")
## 
## REML criterion at convergence: 719.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.83917 -0.42288 -0.04839  0.42527  2.95726 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  yoked_id     (Intercept) 0.03274  0.1809  
##  target_image (Intercept) 0.00000  0.0000  
##  Residual                 0.16117  0.4015  
## Number of obs: 616, groups:  yoked_id, 77; target_image, 8
## 
## Fixed effects:
##                                  Estimate Std. Error         df t value
## (Intercept)                      0.027597   0.026208  75.999996   1.053
## exposure_difference              0.010845   0.006049 536.000002   1.793
## stim_set_c                      -0.077922   0.032350 536.000002  -2.409
## exposure_difference:stim_set_c   0.004400   0.012097 536.000002   0.364
##                                Pr(>|t|)  
## (Intercept)                      0.2957  
## exposure_difference              0.0735 .
## stim_set_c                       0.0163 *
## exposure_difference:stim_set_c   0.7162  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) expsr_ stm_s_
## expsr_dffrn  0.000              
## stim_set_c   0.000  0.000       
## expsr_df:__  0.000 -0.058  0.000
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')
# quick plot
ggplot(yoked_item_accuracy, aes(exposure_difference,accuracy_difference)) +
  geom_jitter()+
  geom_smooth(method="lm")

Fancier visualization of the robustness analysis

The figure below shows item-specific differences in accuracy (Active – Passive) for yoked counterparts in the Active and Yoked Passive Mismatch Condition, depending on exposure frequency.

#summarize across items
#across blocks
summarized_yoked_item_accuracy <- yoked_item_accuracy %>%
  group_by(yoked_active_kind,exposure_kind) %>%
  summarize(N=n(),avg_acc_diff=mean(accuracy_difference)) %>%
  mutate(
    yoked_active_kind_f = factor(yoked_active_kind, levels=c("no","low","medium","high"), labels=c("0 Exposures","1 Exposure","2 Exposures","5 Exposures")),
    exposure_kind_f = factor(exposure_kind, levels=c("no","low","medium","high"), labels=c("0 Exposures","1 Exposure","2 Exposures","5 Exposures")))

ggplot(summarized_yoked_item_accuracy, aes(yoked_active_kind_f,  exposure_kind_f)) +
  geom_tile(aes(fill = avg_acc_diff),color="white",size=0.9)+
  scale_fill_viridis(name="Accuracy Difference\n(Active-Passive)",option="B")+
  xlab("Exposure Frequency in Active Condition")+
  ylab("Exposure Frequency in corresponding\nYoked Passive Mismatch Condition")

ggsave(here(figure_path,"active_vs_yoked_passive_accuracy_landscape_exposure_frequency.png"),width=9,height=6)

The x-axis corresponds to the frequency of exposure for a given item and participant in the Active condition. The y-axis corresponds the frequency of exposure for the corresponding item and yoked participant in the Yoked Passive Mismatch condition. The color fill represents the average difference in accuracy between the Active condition and the Yoked Passive Mismatch condition for a given item and yoked pairing. Darker colors represent a greater advantage for performance in the Yoked Passive Mismatch condition, and lighter/ yellower colors represent an advantage for performance in the Active condition.

Session Info

sessionInfo()
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Sonoma 14.8.2
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/Los_Angeles
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] broom.mixed_0.2.9.6 lmerTest_3.1-3      gghalves_0.1.4     
##  [4] ggstance_0.3.7      ggbeeswarm_0.7.2    viridis_0.6.5      
##  [7] viridisLite_0.4.2   AICcmodavg_2.3-3    car_3.1-2          
## [10] carData_3.0-5       sciplot_1.2-0       cowplot_1.1.3      
## [13] lme4_1.1-35.3       Matrix_1.6-5        lubridate_1.9.3    
## [16] forcats_1.0.0       stringr_1.5.1       purrr_1.0.4        
## [19] readr_2.1.5         tidyr_1.3.1         tibble_3.2.1       
## [22] ggplot2_3.5.2       tidyverse_2.0.0     dplyr_1.1.4        
## [25] here_1.0.1          knitr_1.49         
## 
## loaded via a namespace (and not attached):
##  [1] tidyselect_1.2.1    vipor_0.4.7         farver_2.1.1       
##  [4] fastmap_1.1.1       digest_0.6.35       timechange_0.3.0   
##  [7] lifecycle_1.0.4     survival_3.5-8      magrittr_2.0.3     
## [10] compiler_4.3.2      rlang_1.1.6         sass_0.4.9         
## [13] tools_4.3.2         utf8_1.2.4          yaml_2.3.8         
## [16] labeling_0.4.3      bit_4.0.5           plyr_1.8.9         
## [19] RColorBrewer_1.1-3  abind_1.4-5         withr_3.0.2        
## [22] numDeriv_2016.8-1.1 grid_4.3.2          stats4_4.3.2       
## [25] fansi_1.0.6         unmarked_1.4.1      xtable_1.8-4       
## [28] future_1.33.2       globals_0.16.3      scales_1.4.0       
## [31] MASS_7.3-60.0.1     cli_3.6.2           crayon_1.5.2       
## [34] rmarkdown_2.26      ragg_1.3.0          generics_0.1.3     
## [37] rstudioapi_0.16.0   tzdb_0.4.0          minqa_1.2.6        
## [40] cachem_1.0.8        splines_4.3.2       parallel_4.3.2     
## [43] vctrs_0.6.5         boot_1.3-30         jsonlite_1.8.8     
## [46] VGAM_1.1-10         hms_1.1.3           bit64_4.0.5        
## [49] beeswarm_0.4.0      listenv_0.9.1       systemfonts_1.0.6  
## [52] jquerylib_0.1.4     glue_1.8.0          parallelly_1.37.1  
## [55] nloptr_2.0.3        codetools_0.2-20    stringi_1.8.3      
## [58] gtable_0.3.6        pillar_1.9.0        furrr_0.3.1        
## [61] htmltools_0.5.8.1   R6_2.5.1            textshaping_0.3.7  
## [64] rprojroot_2.0.4     vroom_1.6.5         evaluate_0.23      
## [67] lattice_0.22-6      backports_1.4.1     broom_1.0.5        
## [70] bslib_0.7.0         Rcpp_1.0.12         gridExtra_2.3      
## [73] nlme_3.1-164        mgcv_1.9-1          xfun_0.52          
## [76] pkgconfig_2.0.3